From: Sergei Barannikov Date: Tue, 16 May 2023 17:39:16 +0000 (+0300) Subject: [clang] Convert NVPTX OpenMP tests to opaque pointers X-Git-Tag: upstream/17.0.6~8255 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9beb817cc8db28e2755cd9e230256121d0edc246;p=platform%2Fupstream%2Fllvm.git [clang] Convert NVPTX OpenMP tests to opaque pointers Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D150694 --- diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp index e0ee1dd..bd9860a 100644 --- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp +++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefixes=CHECK-64 %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefixes=CHECK-32 %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefixes=CHECK-32-EX %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefixes=CHECK-64 %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefixes=CHECK-32 %s +// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefixes=CHECK-32-EX %s // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -279,33 +279,31 @@ int a; // CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK-64-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK-64-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -315,178 +313,173 @@ int a; // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 // CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED16:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS19:%.*]] = alloca [3 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS19:%.*]] = alloca [3 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-64: omp_if.then: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130:![0-9]+]] // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK-64-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP130]] // CHECK-64-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK-64-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK-64-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK-64-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK-64-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK-64-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK-64-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP12]] to i8* -// CHECK-64-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr +// CHECK-64-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr +// CHECK-64-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK-64-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP12]] to ptr +// CHECK-64-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP130]] // CHECK-64-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP19]] to i1 // CHECK-64-NEXT: [[TMP20:%.*]] = zext i1 [[TOBOOL4]] to i32 -// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP20]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP21]], i64 3), !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP20]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3), !llvm.access.group [[ACC_GRP130]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK-64-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-64-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP28]], 9 // CHECK-64-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] -// CHECK-64: cond.true8: +// CHECK-64: cond.true7: // CHECK-64-NEXT: br label [[COND_END10:%.*]] -// CHECK-64: cond.false9: -// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64: cond.false8: +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-64-NEXT: br label [[COND_END10]] -// CHECK-64: cond.end10: +// CHECK-64: cond.end9: // CHECK-64-NEXT: [[COND11:%.*]] = phi i32 [ 9, [[COND_TRUE8]] ], [ [[TMP29]], [[COND_FALSE9]] ] -// CHECK-64-NEXT: store i32 [[COND11]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: store i32 [[COND11]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP131:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-64: omp_if.else: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] -// CHECK-64: omp.inner.for.cond12: -// CHECK-64-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64: omp.inner.for.cond11: +// CHECK-64-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP31]], 10 // CHECK-64-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END30:%.*]] -// CHECK-64: omp.inner.for.body14: -// CHECK-64-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64: omp.inner.for.body13: +// CHECK-64-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP33:%.*]] = zext i32 [[TMP32]] to i64 -// CHECK-64-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP35:%.*]] = zext i32 [[TMP34]] to i64 -// CHECK-64-NEXT: [[TMP36:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: [[TMP36:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-64-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP36]] to i1 -// CHECK-64-NEXT: [[CONV17:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED16]] to i8* // CHECK-64-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TOBOOL15]] to i8 -// CHECK-64-NEXT: store i8 [[FROMBOOL18]], i8* [[CONV17]], align 1 -// CHECK-64-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED16]], align 8 -// CHECK-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP39:%.*]] = inttoptr i64 [[TMP33]] to i8* -// CHECK-64-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 8 -// CHECK-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP41:%.*]] = inttoptr i64 [[TMP35]] to i8* -// CHECK-64-NEXT: store i8* [[TMP41]], i8** [[TMP40]], align 8 -// CHECK-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i64 0, i64 2 -// CHECK-64-NEXT: [[TMP43:%.*]] = inttoptr i64 [[TMP37]] to i8* -// CHECK-64-NEXT: store i8* [[TMP43]], i8** [[TMP42]], align 8 -// CHECK-64-NEXT: [[TMP44:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: store i8 [[FROMBOOL18]], ptr [[DOTCAPTURE_EXPR__CASTED16]], align 1 +// CHECK-64-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED16]], align 8 +// CHECK-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS19]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP39:%.*]] = inttoptr i64 [[TMP33]] to ptr +// CHECK-64-NEXT: store ptr [[TMP39]], ptr [[TMP38]], align 8 +// CHECK-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS19]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP41:%.*]] = inttoptr i64 [[TMP35]] to ptr +// CHECK-64-NEXT: store ptr [[TMP41]], ptr [[TMP40]], align 8 +// CHECK-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS19]], i64 0, i64 2 +// CHECK-64-NEXT: [[TMP43:%.*]] = inttoptr i64 [[TMP37]] to ptr +// CHECK-64-NEXT: store ptr [[TMP43]], ptr [[TMP42]], align 8 +// CHECK-64-NEXT: [[TMP44:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-64-NEXT: [[TOBOOL20:%.*]] = trunc i8 [[TMP44]] to i1 // CHECK-64-NEXT: [[TMP45:%.*]] = zext i1 [[TOBOOL20]] to i32 -// CHECK-64-NEXT: [[TMP46:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS19]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP45]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1 to i8*), i8* null, i8** [[TMP46]], i64 3) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP45]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1, ptr null, ptr [[CAPTURED_VARS_ADDRS19]], i64 3) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] -// CHECK-64: omp.inner.for.inc21: -// CHECK-64-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64: omp.inner.for.inc19: +// CHECK-64-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] -// CHECK-64-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] -// CHECK-64-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP51]], [[TMP52]] -// CHECK-64-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP53]], 9 // CHECK-64-NEXT: br i1 [[CMP25]], label [[COND_TRUE26:%.*]], label [[COND_FALSE27:%.*]] -// CHECK-64: cond.true26: +// CHECK-64: cond.true24: // CHECK-64-NEXT: br label [[COND_END28:%.*]] -// CHECK-64: cond.false27: -// CHECK-64-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64: cond.false25: +// CHECK-64-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END28]] -// CHECK-64: cond.end28: +// CHECK-64: cond.end26: // CHECK-64-NEXT: [[COND29:%.*]] = phi i32 [ 9, [[COND_TRUE26]] ], [ [[TMP54]], [[COND_FALSE27]] ] -// CHECK-64-NEXT: store i32 [[COND29]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP55]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND29]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP55]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP134:![0-9]+]] -// CHECK-64: omp.inner.for.end30: +// CHECK-64: omp.inner.for.end28: // CHECK-64-NEXT: br label [[OMP_IF_END]] // CHECK-64: omp_if.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 // CHECK-64-NEXT: br i1 [[TMP57]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 @@ -497,104 +490,103 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-64: omp_if.then: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136:![0-9]+]] // CHECK-64-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP136]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP136]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP137:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-64: omp_if.else: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND5:%.*]] -// CHECK-64: omp.inner.for.cond5: -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64: omp.inner.for.cond4: +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CONV6:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK-64-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP15]] // CHECK-64-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END14:%.*]] -// CHECK-64: omp.inner.for.body8: -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64: omp.inner.for.body7: +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] -// CHECK-64-NEXT: store i32 [[ADD10]], i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[ADD10]], ptr [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE11:%.*]] -// CHECK-64: omp.body.continue11: +// CHECK-64: omp.body.continue10: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC12:%.*]] -// CHECK-64: omp.inner.for.inc12: -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64: omp.inner.for.inc11: +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND5]], !llvm.loop [[LOOP139:![0-9]+]] -// CHECK-64: omp.inner.for.end14: +// CHECK-64: omp.inner.for.end13: // CHECK-64-NEXT: br label [[OMP_IF_END]] // CHECK-64: omp_if.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 // CHECK-64-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 @@ -605,94 +597,93 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-64: omp_if.then: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140:![0-9]+]] // CHECK-64-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP140]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP140]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP141:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-64: omp_if.else: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND5:%.*]] -// CHECK-64: omp.inner.for.cond5: -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64: omp.inner.for.cond4: +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CONV6:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK-64-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP15]] // CHECK-64-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END14:%.*]] -// CHECK-64: omp.inner.for.body8: -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64: omp.inner.for.body7: +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] -// CHECK-64-NEXT: store i32 [[ADD10]], i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[ADD10]], ptr [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE11:%.*]] -// CHECK-64: omp.body.continue11: +// CHECK-64: omp.body.continue10: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC12:%.*]] -// CHECK-64: omp.inner.for.inc12: -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64: omp.inner.for.inc11: +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND5]], !llvm.loop [[LOOP143:![0-9]+]] -// CHECK-64: omp.inner.for.end14: +// CHECK-64: omp.inner.for.end13: // CHECK-64-NEXT: br label [[OMP_IF_END]] // CHECK-64: omp_if.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 // CHECK-64-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void @@ -703,25 +694,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -729,95 +720,94 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]] // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP144]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP145:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 // CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -827,64 +817,64 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]] -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP148:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void @@ -895,25 +885,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -921,95 +911,94 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]] // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP150]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP151:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 // CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1019,55 +1008,55 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]] // CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP153]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP153]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP154:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void @@ -1078,25 +1067,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -1104,95 +1093,94 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]] // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP156]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP157:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 // CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1202,62 +1190,62 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP160:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void @@ -1268,25 +1256,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -1294,95 +1282,94 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]] // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP162]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP163:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 // CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1392,62 +1379,62 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP166:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void @@ -1458,25 +1445,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -1484,95 +1471,94 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]] // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP168]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP169:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 // CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1582,62 +1568,62 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP172:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void @@ -1648,25 +1634,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -1674,95 +1660,94 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]] // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP174]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP175:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 // CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1772,62 +1757,62 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP178:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void @@ -1840,31 +1825,29 @@ int a; // CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK-64-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -1874,108 +1857,104 @@ int a; // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK-64-NEXT: [[A1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) -// CHECK-64-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A1]] to i32* -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-64-NEXT: [[A1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 -// CHECK-64-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK-64-NEXT: store i32 [[TMP10]], i32* [[CONV3]], align 4 -// CHECK-64-NEXT: [[TMP11:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK-64-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP11]] to i8* -// CHECK-64-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK-64-NEXT: [[TMP18:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP18]], i64 3) +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK-64-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 +// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK-64-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP11]] to ptr +// CHECK-64-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP25]], 9 // CHECK-64-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] -// CHECK-64: cond.true7: +// CHECK-64: cond.true6: // CHECK-64-NEXT: br label [[COND_END9:%.*]] -// CHECK-64: cond.false8: -// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64: cond.false7: +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END9]] -// CHECK-64: cond.end9: +// CHECK-64: cond.end8: // CHECK-64-NEXT: [[COND10:%.*]] = phi i32 [ 9, [[COND_TRUE7]] ], [ [[TMP26]], [[COND_FALSE8]] ] -// CHECK-64-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND10]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK-64-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK-64: .omp.lastprivate.then: -// CHECK-64-NEXT: [[TMP30:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 -// CHECK-64-NEXT: store i32 [[TMP30]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[TMP30:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK-64-NEXT: store i32 [[TMP30]], ptr [[A_ADDR]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK-64: .omp.lastprivate.done: -// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[A1]], i64 4) +// CHECK-64-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i64 4) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 @@ -1987,60 +1966,59 @@ int a; // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CONV4:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4 -// CHECK-64-NEXT: store i32 [[TMP8]], i32* [[A3]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-64-NEXT: store i32 [[TMP8]], ptr [[A3]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK-64: .omp.lastprivate.then: -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK-64-NEXT: store i32 [[TMP13]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK-64-NEXT: store i32 [[TMP13]], ptr [[A_ADDR]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK-64: .omp.lastprivate.done: // CHECK-64-NEXT: ret void @@ -2051,25 +2029,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -2077,88 +2055,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2168,59 +2145,59 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-64-NEXT: ret void // // @@ -2229,25 +2206,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -2255,88 +2232,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2346,50 +2322,50 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-64-NEXT: ret void // // @@ -2398,25 +2374,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -2424,88 +2400,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2515,51 +2490,51 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP181:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -2574,25 +2549,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -2600,88 +2575,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2691,51 +2665,51 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP184:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -2750,25 +2724,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -2776,88 +2750,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2867,51 +2840,51 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP187:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -2926,25 +2899,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -2952,88 +2925,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3043,51 +3015,51 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP190:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -3102,25 +3074,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -3129,95 +3101,94 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]] // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP192]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP193:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 // CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3227,55 +3198,55 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]] // CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP195]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP195]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP196:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void @@ -3286,25 +3257,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[B:%.*]] = alloca [3 x i32], align 4 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -3313,97 +3284,95 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = bitcast [3 x i32]* [[B]] to i8* -// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([3 x i32]* @"__const..b" to i8*), i64 12, i1 false) -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B]], ptr align 4 @"__const..b", i64 12, i1 false) +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]] // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK-64-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK-64-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP15]], i64 2), !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr +// CHECK-64-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr +// CHECK-64-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP198]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP199:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK-64-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3413,64 +3382,64 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]] -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP202:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void @@ -3481,25 +3450,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -3507,81 +3476,78 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[C:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) -// CHECK-64-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C]] to i32** -// CHECK-64-NEXT: [[B:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) -// CHECK-64-NEXT: [[B_ON_STACK:%.*]] = bitcast i8* [[B]] to i32* -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[C:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK-64-NEXT: [[B:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]] -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] // CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP204]] // CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] // CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK-64-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8, !llvm.access.group [[ACC_GRP204]] -// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK-64-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group [[ACC_GRP204]] -// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined to i8*), i8* bitcast (void (i16, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined_wrapper to i8*), i8** [[TMP15]], i64 2), !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr +// CHECK-64-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr +// CHECK-64-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP204]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP204]] // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP205:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 // CHECK-64-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: store i32* [[B_ON_STACK]], i32** [[C_ON_STACK]], align 8 -// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[B]], i64 4) -// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[C]], i64 8) +// CHECK-64-NEXT: store ptr [[B]], ptr [[C]], align 8 +// CHECK-64-NEXT: call void @__kmpc_free_shared(ptr [[B]], i64 4) +// CHECK-64-NEXT: call void @__kmpc_free_shared(ptr [[C]], i64 8) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3591,55 +3557,55 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]] // CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP207]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP207]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP208:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void @@ -3651,19 +3617,17 @@ int a; // CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK-64-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK-64-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i64* -// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8 -// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 -// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64* -// CHECK-64-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i64 [[TMP5]], i64 [[TMP8]]) #[[ATTR2]] +// CHECK-64-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], i64 [[TMP5]], i64 [[TMP8]]) #[[ATTR2]] // CHECK-64-NEXT: ret void // // @@ -3672,25 +3636,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -3698,95 +3662,94 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]] // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP210]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP211:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 // CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3796,62 +3759,62 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP214:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void @@ -3862,25 +3825,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -3888,95 +3851,94 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]] // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP216]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP217:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 // CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3986,62 +3948,62 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP220:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void @@ -4052,25 +4014,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -4078,95 +4040,94 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]] // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP222]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP223:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 // CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -4176,62 +4137,62 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP226:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void @@ -4242,25 +4203,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -4268,95 +4229,94 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]] // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP228]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP229:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 // CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -4366,62 +4326,62 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP232:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: // CHECK-64-NEXT: ret void @@ -4432,25 +4392,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -4458,88 +4418,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -4549,50 +4508,50 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-64-NEXT: ret void // // @@ -4601,25 +4560,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -4627,88 +4586,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -4718,59 +4676,59 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-64-NEXT: ret void // // @@ -4779,25 +4737,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -4805,88 +4763,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -4896,50 +4853,50 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-64-NEXT: ret void // // @@ -4948,25 +4905,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -4974,88 +4931,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -5065,51 +5021,51 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP235:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -5124,25 +5080,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -5150,88 +5106,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -5241,51 +5196,51 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP238:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -5300,25 +5255,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -5326,88 +5281,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -5417,51 +5371,51 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP241:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -5476,25 +5430,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -5502,88 +5456,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -5593,51 +5546,51 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP244:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -5652,25 +5605,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -5678,88 +5631,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -5769,50 +5721,50 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-64-NEXT: ret void // // @@ -5821,25 +5773,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -5847,88 +5799,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -5938,59 +5889,59 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-64-NEXT: ret void // // @@ -5999,25 +5950,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -6025,88 +5976,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -6116,50 +6066,50 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-64-NEXT: ret void // // @@ -6168,25 +6118,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -6194,88 +6144,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -6285,51 +6234,51 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP247:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -6344,25 +6293,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -6370,88 +6319,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -6461,51 +6409,51 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP250:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -6520,25 +6468,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -6546,88 +6494,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -6637,51 +6584,51 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP253:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -6696,25 +6643,25 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -6722,88 +6669,87 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: // CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] -// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -6813,51 +6759,51 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP256:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -6871,30 +6817,28 @@ int a; // CHECK-64-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-64-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK-64-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined to i8*), i8* null, i8** [[TMP4]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6902,93 +6846,92 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6996,76 +6939,75 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7073,93 +7015,92 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7167,41 +7108,41 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP259:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -7214,25 +7155,24 @@ int a; // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7240,41 +7180,41 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP262:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -7287,25 +7227,24 @@ int a; // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7313,41 +7252,41 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP265:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -7360,25 +7299,24 @@ int a; // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7386,41 +7324,41 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP268:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -7434,30 +7372,28 @@ int a; // CHECK-64-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-64-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK-64-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined to i8*), i8* null, i8** [[TMP4]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7465,101 +7401,100 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270:![0-9]+]] -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270:![0-9]+]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP271:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 // CHECK-64-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7567,84 +7502,83 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273:![0-9]+]] -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP274:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 // CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7652,101 +7586,100 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276:![0-9]+]] -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276:![0-9]+]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP277:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 // CHECK-64-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7754,80 +7687,79 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP280:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7835,80 +7767,79 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP283:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7916,80 +7847,79 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP286:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7997,80 +7927,79 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP289:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8078,81 +8007,80 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 65, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 65, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP291]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP291]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] -// CHECK-64-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP291]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-64-NEXT: call void @__kmpc_dispatch_fini_4(ptr @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP291]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP292:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8160,84 +8088,83 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294:![0-9]+]] -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP295:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 // CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8245,101 +8172,100 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297:![0-9]+]] -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297:![0-9]+]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP298:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 // CHECK-64-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8347,80 +8273,79 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP301:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8428,80 +8353,79 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP304:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8509,80 +8433,79 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP307:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8590,80 +8513,79 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP310:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8671,93 +8593,92 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8765,76 +8686,75 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8842,93 +8762,92 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8936,41 +8855,41 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP313:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -8983,25 +8902,24 @@ int a; // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9009,41 +8927,41 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP316:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -9056,25 +8974,24 @@ int a; // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9082,41 +8999,41 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP319:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -9129,25 +9046,24 @@ int a; // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9155,41 +9071,41 @@ int a; // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP322:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -9206,33 +9122,31 @@ int a; // CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-32-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK-32-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK-32-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -9242,174 +9156,169 @@ int a; // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 // CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED16:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS19:%.*]] = alloca [3 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS19:%.*]] = alloca [3 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-32: omp_if.then: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130:![0-9]+]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP130]] // CHECK-32-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK-32-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK-32-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK-32-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK-32-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK-32-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK-32-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to ptr +// CHECK-32-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP10]] to ptr +// CHECK-32-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP130]] // CHECK-32-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP17]] to i1 // CHECK-32-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL4]] to i32 -// CHECK-32-NEXT: [[TMP19:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP18]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP19]], i32 3), !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3), !llvm.access.group [[ACC_GRP130]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-32-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP26]], 9 // CHECK-32-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] -// CHECK-32: cond.true8: +// CHECK-32: cond.true7: // CHECK-32-NEXT: br label [[COND_END10:%.*]] -// CHECK-32: cond.false9: -// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32: cond.false8: +// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-32-NEXT: br label [[COND_END10]] -// CHECK-32: cond.end10: +// CHECK-32: cond.end9: // CHECK-32-NEXT: [[COND11:%.*]] = phi i32 [ 9, [[COND_TRUE8]] ], [ [[TMP27]], [[COND_FALSE9]] ] -// CHECK-32-NEXT: store i32 [[COND11]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: store i32 [[COND11]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP131:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-32: omp_if.else: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] -// CHECK-32: omp.inner.for.cond12: -// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32: omp.inner.for.cond11: +// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP29]], 10 // CHECK-32-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END30:%.*]] -// CHECK-32: omp.inner.for.body14: -// CHECK-32-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP32:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32: omp.inner.for.body13: +// CHECK-32-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP32:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP32]] to i1 -// CHECK-32-NEXT: [[CONV17:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED16]] to i8* // CHECK-32-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TOBOOL15]] to i8 -// CHECK-32-NEXT: store i8 [[FROMBOOL18]], i8* [[CONV17]], align 1 -// CHECK-32-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED16]], align 4 -// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP30]] to i8* -// CHECK-32-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 4 -// CHECK-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP31]] to i8* -// CHECK-32-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 4 -// CHECK-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 2 -// CHECK-32-NEXT: [[TMP39:%.*]] = inttoptr i32 [[TMP33]] to i8* -// CHECK-32-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 4 -// CHECK-32-NEXT: [[TMP40:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: store i8 [[FROMBOOL18]], ptr [[DOTCAPTURE_EXPR__CASTED16]], align 1 +// CHECK-32-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED16]], align 4 +// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS19]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP30]] to ptr +// CHECK-32-NEXT: store ptr [[TMP35]], ptr [[TMP34]], align 4 +// CHECK-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS19]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP31]] to ptr +// CHECK-32-NEXT: store ptr [[TMP37]], ptr [[TMP36]], align 4 +// CHECK-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS19]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP39:%.*]] = inttoptr i32 [[TMP33]] to ptr +// CHECK-32-NEXT: store ptr [[TMP39]], ptr [[TMP38]], align 4 +// CHECK-32-NEXT: [[TMP40:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-NEXT: [[TOBOOL20:%.*]] = trunc i8 [[TMP40]] to i1 // CHECK-32-NEXT: [[TMP41:%.*]] = zext i1 [[TOBOOL20]] to i32 -// CHECK-32-NEXT: [[TMP42:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS19]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP41]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1 to i8*), i8* null, i8** [[TMP42]], i32 3) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP41]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1, ptr null, ptr [[CAPTURED_VARS_ADDRS19]], i32 3) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] -// CHECK-32: omp.inner.for.inc21: -// CHECK-32-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32: omp.inner.for.inc19: +// CHECK-32-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] -// CHECK-32-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] -// CHECK-32-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] -// CHECK-32-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP49]], 9 // CHECK-32-NEXT: br i1 [[CMP25]], label [[COND_TRUE26:%.*]], label [[COND_FALSE27:%.*]] -// CHECK-32: cond.true26: +// CHECK-32: cond.true24: // CHECK-32-NEXT: br label [[COND_END28:%.*]] -// CHECK-32: cond.false27: -// CHECK-32-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32: cond.false25: +// CHECK-32-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END28]] -// CHECK-32: cond.end28: +// CHECK-32: cond.end26: // CHECK-32-NEXT: [[COND29:%.*]] = phi i32 [ 9, [[COND_TRUE26]] ], [ [[TMP50]], [[COND_FALSE27]] ] -// CHECK-32-NEXT: store i32 [[COND29]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP51]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND29]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP51]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP134:![0-9]+]] -// CHECK-32: omp.inner.for.end30: +// CHECK-32: omp.inner.for.end28: // CHECK-32-NEXT: br label [[OMP_IF_END]] // CHECK-32: omp_if.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 // CHECK-32-NEXT: br i1 [[TMP53]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 @@ -9420,100 +9329,99 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-32: omp_if.then: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136:![0-9]+]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136:![0-9]+]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP137:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-32: omp_if.else: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2:%.*]] // CHECK-32: omp.inner.for.cond2: -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK-32-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK-32-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY4:%.*]], label [[OMP_INNER_FOR_END10:%.*]] // CHECK-32: omp.inner.for.body4: -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[ADD6]], ptr [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE7:%.*]] // CHECK-32: omp.body.continue7: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC8:%.*]] // CHECK-32: omp.inner.for.inc8: -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP139:![0-9]+]] // CHECK-32: omp.inner.for.end10: // CHECK-32-NEXT: br label [[OMP_IF_END]] // CHECK-32: omp_if.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 // CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 @@ -9524,90 +9432,89 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-32: omp_if.then: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140:![0-9]+]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140:![0-9]+]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP141:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-32: omp_if.else: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2:%.*]] // CHECK-32: omp.inner.for.cond2: -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK-32-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK-32-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY4:%.*]], label [[OMP_INNER_FOR_END10:%.*]] // CHECK-32: omp.inner.for.body4: -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[ADD6]], ptr [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE7:%.*]] // CHECK-32: omp.body.continue7: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC8:%.*]] // CHECK-32: omp.inner.for.inc8: -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP143:![0-9]+]] // CHECK-32: omp.inner.for.end10: // CHECK-32-NEXT: br label [[OMP_IF_END]] // CHECK-32: omp_if.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 // CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void @@ -9618,25 +9525,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -9644,93 +9551,92 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP144]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP145:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -9740,62 +9646,62 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]] -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP148:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void @@ -9806,25 +9712,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -9832,93 +9738,92 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP150]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP151:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -9928,52 +9833,52 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]] -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP154:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void @@ -9984,25 +9889,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -10010,93 +9915,92 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP156]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP157:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -10106,60 +10010,60 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP160:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void @@ -10170,25 +10074,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -10196,93 +10100,92 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP162]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP163:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -10292,60 +10195,60 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP166:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void @@ -10356,25 +10259,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -10382,93 +10285,92 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP168]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP169:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -10478,60 +10380,60 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP172:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void @@ -10542,25 +10444,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -10568,93 +10470,92 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP174]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP175:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -10664,60 +10565,60 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP178:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void @@ -10730,29 +10631,29 @@ int a; // CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[A_CASTED]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -10762,104 +10663,102 @@ int a; // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-32-NEXT: [[A1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) -// CHECK-32-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A1]] to i32* -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[A1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 -// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK-32-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP9]] to i8* -// CHECK-32-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP16]], i32 3) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP9]] to ptr +// CHECK-32-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP23]], 9 // CHECK-32-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK-32: cond.true6: // CHECK-32-NEXT: br label [[COND_END8:%.*]] // CHECK-32: cond.false7: -// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END8]] // CHECK-32: cond.end8: // CHECK-32-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP24]], [[COND_FALSE7]] ] -// CHECK-32-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK-32: .omp.lastprivate.then: -// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 -// CHECK-32-NEXT: store i32 [[TMP28]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP28]], ptr [[A_ADDR]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK-32: .omp.lastprivate.done: -// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[A1]], i32 4) +// CHECK-32-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i32 4) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -10871,56 +10770,56 @@ int a; // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4 -// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[A1]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], ptr [[A1]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK-32: .omp.lastprivate.then: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK-32-NEXT: store i32 [[TMP13]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP13]], ptr [[A_ADDR]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK-32: .omp.lastprivate.done: // CHECK-32-NEXT: ret void @@ -10931,25 +10830,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -10957,86 +10856,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -11046,57 +10944,57 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-32-NEXT: ret void // // @@ -11105,25 +11003,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -11131,86 +11029,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -11220,47 +11117,47 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-32-NEXT: ret void // // @@ -11269,25 +11166,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -11295,86 +11192,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -11384,49 +11280,49 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP181:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -11441,25 +11337,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -11467,86 +11363,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -11556,49 +11451,49 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP184:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -11613,25 +11508,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -11639,86 +11534,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -11728,49 +11622,49 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP187:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -11785,25 +11679,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -11811,86 +11705,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -11900,49 +11793,49 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP190:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -11957,25 +11850,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -11984,93 +11877,92 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP192]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP193:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -12080,52 +11972,52 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]] -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP196:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void @@ -12136,25 +12028,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[B:%.*]] = alloca [3 x i32], align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -12163,95 +12055,93 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = bitcast [3 x i32]* [[B]] to i8* -// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([3 x i32]* @"__const..b" to i8*), i32 12, i1 false) -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 @"__const..b", i32 12, i1 false) +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK-32-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP13]], i32 2), !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to ptr +// CHECK-32-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP198]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP199:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK-32-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -12261,62 +12151,62 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]] -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP202:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void @@ -12327,25 +12217,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -12353,79 +12243,76 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[C:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) -// CHECK-32-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C]] to i32** -// CHECK-32-NEXT: [[B:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) -// CHECK-32-NEXT: [[B_ON_STACK:%.*]] = bitcast i8* [[B]] to i32* -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[C:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK-32-NEXT: [[B:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]] -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK-32-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined to i8*), i8* bitcast (void (i16, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined_wrapper to i8*), i8** [[TMP13]], i32 2), !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to ptr +// CHECK-32-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP204]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP204]] // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP205:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK-32-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: store i32* [[B_ON_STACK]], i32** [[C_ON_STACK]], align 4 -// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[B]], i32 4) -// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[C]], i32 4) +// CHECK-32-NEXT: store ptr [[B]], ptr [[C]], align 4 +// CHECK-32-NEXT: call void @__kmpc_free_shared(ptr [[B]], i32 4) +// CHECK-32-NEXT: call void @__kmpc_free_shared(ptr [[C]], i32 4) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -12435,52 +12322,52 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]] -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP208:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void @@ -12492,19 +12379,17 @@ int a; // CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK-32-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 -// CHECK-32-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32* -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 1 -// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32* -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32 [[TMP5]], i32 [[TMP8]]) #[[ATTR2]] +// CHECK-32-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], i32 [[TMP5]], i32 [[TMP8]]) #[[ATTR2]] // CHECK-32-NEXT: ret void // // @@ -12513,25 +12398,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -12539,93 +12424,92 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP210]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP211:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -12635,60 +12519,60 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP214:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void @@ -12699,25 +12583,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -12725,93 +12609,92 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP216]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP217:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -12821,60 +12704,60 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP220:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void @@ -12885,25 +12768,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -12911,93 +12794,92 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP222]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP223:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -13007,60 +12889,60 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP226:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void @@ -13071,25 +12953,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -13097,93 +12979,92 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP228]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP229:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -13193,60 +13074,60 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP232:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: // CHECK-32-NEXT: ret void @@ -13257,25 +13138,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -13283,86 +13164,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -13372,47 +13252,47 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-32-NEXT: ret void // // @@ -13421,25 +13301,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -13447,86 +13327,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -13536,57 +13415,57 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-32-NEXT: ret void // // @@ -13595,25 +13474,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -13621,86 +13500,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -13710,47 +13588,47 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-32-NEXT: ret void // // @@ -13759,25 +13637,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -13785,86 +13663,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -13874,49 +13751,49 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP235:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -13931,25 +13808,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -13957,86 +13834,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -14046,49 +13922,49 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP238:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -14103,25 +13979,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -14129,86 +14005,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -14218,49 +14093,49 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP241:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -14275,25 +14150,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -14301,86 +14176,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -14390,49 +14264,49 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP244:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -14447,25 +14321,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -14473,86 +14347,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -14562,47 +14435,47 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-32-NEXT: ret void // // @@ -14611,25 +14484,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -14637,86 +14510,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -14726,57 +14598,57 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-32-NEXT: ret void // // @@ -14785,25 +14657,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -14811,86 +14683,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -14900,47 +14771,47 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-32-NEXT: ret void // // @@ -14949,25 +14820,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -14975,86 +14846,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -15064,49 +14934,49 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP247:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -15121,25 +14991,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -15147,86 +15017,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -15236,49 +15105,49 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP250:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -15293,25 +15162,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -15319,86 +15188,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -15408,49 +15276,49 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP253:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -15465,25 +15333,25 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -15491,86 +15359,85 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: // CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -15580,49 +15447,49 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP256:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -15636,30 +15503,28 @@ int a; // CHECK-32-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-32-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK-32-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined to i8*), i8* null, i8** [[TMP4]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15667,93 +15532,92 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15761,76 +15625,75 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15838,93 +15701,92 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15932,41 +15794,41 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP259:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -15979,25 +15841,24 @@ int a; // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16005,41 +15866,41 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP262:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -16052,25 +15913,24 @@ int a; // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16078,41 +15938,41 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP265:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -16125,25 +15985,24 @@ int a; // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16151,41 +16010,41 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP268:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -16199,30 +16058,28 @@ int a; // CHECK-32-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-32-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK-32-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined to i8*), i8* null, i8** [[TMP4]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16230,101 +16087,100 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270:![0-9]+]] -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270:![0-9]+]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP271:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 // CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16332,84 +16188,83 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273:![0-9]+]] -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP274:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 // CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16417,101 +16272,100 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276:![0-9]+]] -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276:![0-9]+]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP277:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 // CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16519,80 +16373,79 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP280:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16600,80 +16453,79 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP283:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16681,80 +16533,79 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP286:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16762,80 +16613,79 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP289:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16843,81 +16693,80 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 65, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 65, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP291]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP291]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] -// CHECK-32-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP291]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-NEXT: call void @__kmpc_dispatch_fini_4(ptr @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP291]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP292:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16925,84 +16774,83 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294:![0-9]+]] -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP295:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 // CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17010,101 +16858,100 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297:![0-9]+]] -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297:![0-9]+]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP298:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 // CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17112,80 +16959,79 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP301:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17193,80 +17039,79 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP304:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17274,80 +17119,79 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP307:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17355,80 +17199,79 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP310:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17436,93 +17279,92 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17530,76 +17372,75 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17607,93 +17448,92 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17701,41 +17541,41 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP313:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -17748,25 +17588,24 @@ int a; // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17774,41 +17613,41 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP316:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -17821,25 +17660,24 @@ int a; // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17847,41 +17685,41 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP319:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -17894,25 +17732,24 @@ int a; // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17920,41 +17757,41 @@ int a; // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP322:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -17971,33 +17808,31 @@ int a; // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-32-EX-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK-32-EX-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK-32-EX-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -18007,174 +17842,169 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED16:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS19:%.*]] = alloca [3 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS19:%.*]] = alloca [3 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-32-EX: omp_if.then: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130:![0-9]+]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP130]] // CHECK-32-EX-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK-32-EX-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK-32-EX-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK-32-EX-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP10]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP130]] // CHECK-32-EX-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP17]] to i1 // CHECK-32-EX-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL4]] to i32 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP18]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP19]], i32 3), !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3), !llvm.access.group [[ACC_GRP130]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK-32-EX-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-32-EX-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP26]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] -// CHECK-32-EX: cond.true8: +// CHECK-32-EX: cond.true7: // CHECK-32-EX-NEXT: br label [[COND_END10:%.*]] -// CHECK-32-EX: cond.false9: -// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX: cond.false8: +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-32-EX-NEXT: br label [[COND_END10]] -// CHECK-32-EX: cond.end10: +// CHECK-32-EX: cond.end9: // CHECK-32-EX-NEXT: [[COND11:%.*]] = phi i32 [ 9, [[COND_TRUE8]] ], [ [[TMP27]], [[COND_FALSE9]] ] -// CHECK-32-EX-NEXT: store i32 [[COND11]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: store i32 [[COND11]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP131:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-32-EX: omp_if.else: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] -// CHECK-32-EX: omp.inner.for.cond12: -// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX: omp.inner.for.cond11: +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP29]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END30:%.*]] -// CHECK-32-EX: omp.inner.for.body14: -// CHECK-32-EX-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP32:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX: omp.inner.for.body13: +// CHECK-32-EX-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP32:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-EX-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP32]] to i1 -// CHECK-32-EX-NEXT: [[CONV17:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED16]] to i8* // CHECK-32-EX-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TOBOOL15]] to i8 -// CHECK-32-EX-NEXT: store i8 [[FROMBOOL18]], i8* [[CONV17]], align 1 -// CHECK-32-EX-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED16]], align 4 -// CHECK-32-EX-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP30]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 4 -// CHECK-32-EX-NEXT: [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP31]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 4 -// CHECK-32-EX-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 2 -// CHECK-32-EX-NEXT: [[TMP39:%.*]] = inttoptr i32 [[TMP33]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 4 -// CHECK-32-EX-NEXT: [[TMP40:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: store i8 [[FROMBOOL18]], ptr [[DOTCAPTURE_EXPR__CASTED16]], align 1 +// CHECK-32-EX-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED16]], align 4 +// CHECK-32-EX-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS19]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP30]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP35]], ptr [[TMP34]], align 4 +// CHECK-32-EX-NEXT: [[TMP36:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS19]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP31]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP37]], ptr [[TMP36]], align 4 +// CHECK-32-EX-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS19]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP39:%.*]] = inttoptr i32 [[TMP33]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP39]], ptr [[TMP38]], align 4 +// CHECK-32-EX-NEXT: [[TMP40:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-EX-NEXT: [[TOBOOL20:%.*]] = trunc i8 [[TMP40]] to i1 // CHECK-32-EX-NEXT: [[TMP41:%.*]] = zext i1 [[TOBOOL20]] to i32 -// CHECK-32-EX-NEXT: [[TMP42:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS19]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP41]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1 to i8*), i8* null, i8** [[TMP42]], i32 3) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP41]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1, ptr null, ptr [[CAPTURED_VARS_ADDRS19]], i32 3) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] -// CHECK-32-EX: omp.inner.for.inc21: -// CHECK-32-EX-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX: omp.inner.for.inc19: +// CHECK-32-EX-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] -// CHECK-32-EX-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] -// CHECK-32-EX-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] -// CHECK-32-EX-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP49]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP25]], label [[COND_TRUE26:%.*]], label [[COND_FALSE27:%.*]] -// CHECK-32-EX: cond.true26: +// CHECK-32-EX: cond.true24: // CHECK-32-EX-NEXT: br label [[COND_END28:%.*]] -// CHECK-32-EX: cond.false27: -// CHECK-32-EX-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX: cond.false25: +// CHECK-32-EX-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END28]] -// CHECK-32-EX: cond.end28: +// CHECK-32-EX: cond.end26: // CHECK-32-EX-NEXT: [[COND29:%.*]] = phi i32 [ 9, [[COND_TRUE26]] ], [ [[TMP50]], [[COND_FALSE27]] ] -// CHECK-32-EX-NEXT: store i32 [[COND29]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP51]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND29]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP51]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP134:![0-9]+]] -// CHECK-32-EX: omp.inner.for.end30: +// CHECK-32-EX: omp.inner.for.end28: // CHECK-32-EX-NEXT: br label [[OMP_IF_END]] // CHECK-32-EX: omp_if.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP53]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 @@ -18185,100 +18015,99 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-32-EX: omp_if.then: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP137:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-32-EX: omp_if.else: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2:%.*]] // CHECK-32-EX: omp.inner.for.cond2: -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK-32-EX-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK-32-EX-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY4:%.*]], label [[OMP_INNER_FOR_END10:%.*]] // CHECK-32-EX: omp.inner.for.body4: -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK-32-EX-NEXT: store i32 [[ADD6]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD6]], ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE7:%.*]] // CHECK-32-EX: omp.body.continue7: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC8:%.*]] // CHECK-32-EX: omp.inner.for.inc8: -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP139:![0-9]+]] // CHECK-32-EX: omp.inner.for.end10: // CHECK-32-EX-NEXT: br label [[OMP_IF_END]] // CHECK-32-EX: omp_if.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 @@ -18289,90 +18118,89 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-32-EX: omp_if.then: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP141:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-32-EX: omp_if.else: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2:%.*]] // CHECK-32-EX: omp.inner.for.cond2: -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK-32-EX-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK-32-EX-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY4:%.*]], label [[OMP_INNER_FOR_END10:%.*]] // CHECK-32-EX: omp.inner.for.body4: -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK-32-EX-NEXT: store i32 [[ADD6]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD6]], ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE7:%.*]] // CHECK-32-EX: omp.body.continue7: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC8:%.*]] // CHECK-32-EX: omp.inner.for.inc8: -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP143:![0-9]+]] // CHECK-32-EX: omp.inner.for.end10: // CHECK-32-EX-NEXT: br label [[OMP_IF_END]] // CHECK-32-EX: omp_if.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void @@ -18383,25 +18211,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -18409,93 +18237,92 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP144]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP145:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -18505,62 +18332,62 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP148:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void @@ -18571,25 +18398,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -18597,93 +18424,92 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP150]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP151:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -18693,52 +18519,52 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP154:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void @@ -18749,25 +18575,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -18775,93 +18601,92 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP156]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP157:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -18871,60 +18696,60 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP160:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void @@ -18935,25 +18760,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -18961,93 +18786,92 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP162]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP163:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -19057,60 +18881,60 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP166:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void @@ -19121,25 +18945,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -19147,93 +18971,92 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP168]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP169:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -19243,60 +19066,60 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP172:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void @@ -19307,25 +19130,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -19333,93 +19156,92 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP174]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP175:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -19429,60 +19251,60 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP178:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void @@ -19495,29 +19317,29 @@ int a; // CHECK-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[A_CASTED]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -19527,104 +19349,102 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[A1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) -// CHECK-32-EX-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A1]] to i32* -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[A1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP9]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP16]], i32 3) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP9]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP23]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK-32-EX: cond.true6: // CHECK-32-EX-NEXT: br label [[COND_END8:%.*]] // CHECK-32-EX: cond.false7: -// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END8]] // CHECK-32-EX: cond.end8: // CHECK-32-EX-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP24]], [[COND_FALSE7]] ] -// CHECK-32-EX-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK-32-EX: .omp.lastprivate.then: -// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP28]], i32* [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP28]], ptr [[A_ADDR]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK-32-EX: .omp.lastprivate.done: -// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[A1]], i32 4) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i32 4) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -19636,56 +19456,56 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[A1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], ptr [[A1]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK-32-EX: .omp.lastprivate.then: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP13]], i32* [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP13]], ptr [[A_ADDR]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK-32-EX: .omp.lastprivate.done: // CHECK-32-EX-NEXT: ret void @@ -19696,25 +19516,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -19722,86 +19542,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -19811,57 +19630,57 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-32-EX-NEXT: ret void // // @@ -19870,25 +19689,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -19896,86 +19715,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -19985,47 +19803,47 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-32-EX-NEXT: ret void // // @@ -20034,25 +19852,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -20060,86 +19878,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -20149,49 +19966,49 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP181:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -20206,25 +20023,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -20232,86 +20049,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -20321,49 +20137,49 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP184:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -20378,25 +20194,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -20404,86 +20220,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -20493,49 +20308,49 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP187:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -20550,25 +20365,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -20576,86 +20391,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -20665,49 +20479,49 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP190:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -20722,25 +20536,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -20749,93 +20563,92 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP192]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP193:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -20845,52 +20658,52 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP196:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void @@ -20901,25 +20714,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[B:%.*]] = alloca [3 x i32], align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -20928,95 +20741,93 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = bitcast [3 x i32]* [[B]] to i8* -// CHECK-32-EX-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([3 x i32]* @"__const..b" to i8*), i32 12, i1 false) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 @"__const..b", i32 12, i1 false) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP13]], i32 2), !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP198]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP199:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -21026,62 +20837,62 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP202:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void @@ -21092,25 +20903,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -21118,79 +20929,76 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[C:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) -// CHECK-32-EX-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C]] to i32** -// CHECK-32-EX-NEXT: [[B:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) -// CHECK-32-EX-NEXT: [[B_ON_STACK:%.*]] = bitcast i8* [[B]] to i32* -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[C:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK-32-EX-NEXT: [[B:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined to i8*), i8* bitcast (void (i16, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined_wrapper to i8*), i8** [[TMP13]], i32 2), !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP204]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP204]] // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP205:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: store i32* [[B_ON_STACK]], i32** [[C_ON_STACK]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[B]], i32 4) -// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[C]], i32 4) +// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[C]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(ptr [[B]], i32 4) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(ptr [[C]], i32 4) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -21200,52 +21008,52 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP208:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void @@ -21257,19 +21065,17 @@ int a; // CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK-32-EX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32* -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 1 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32* -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32 [[TMP5]], i32 [[TMP8]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], i32 [[TMP5]], i32 [[TMP8]]) #[[ATTR2]] // CHECK-32-EX-NEXT: ret void // // @@ -21278,25 +21084,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -21304,93 +21110,92 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP210]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP211:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -21400,60 +21205,60 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP214:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void @@ -21464,25 +21269,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -21490,93 +21295,92 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP216]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP217:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -21586,60 +21390,60 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP220:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void @@ -21650,25 +21454,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -21676,93 +21480,92 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP222]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP223:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -21772,60 +21575,60 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP226:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void @@ -21836,25 +21639,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -21862,93 +21665,92 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP228]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP229:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -21958,60 +21760,60 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP232:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: // CHECK-32-EX-NEXT: ret void @@ -22022,25 +21824,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -22048,86 +21850,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -22137,47 +21938,47 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-32-EX-NEXT: ret void // // @@ -22186,25 +21987,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -22212,86 +22013,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -22301,57 +22101,57 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-32-EX-NEXT: ret void // // @@ -22360,25 +22160,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -22386,86 +22186,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -22475,47 +22274,47 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-32-EX-NEXT: ret void // // @@ -22524,25 +22323,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -22550,86 +22349,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -22639,49 +22437,49 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP235:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -22696,25 +22494,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -22722,86 +22520,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -22811,49 +22608,49 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP238:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -22868,25 +22665,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -22894,86 +22691,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -22983,49 +22779,49 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP241:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -23040,25 +22836,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -23066,86 +22862,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -23155,49 +22950,49 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP244:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -23212,25 +23007,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -23238,86 +23033,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -23327,47 +23121,47 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-32-EX-NEXT: ret void // // @@ -23376,25 +23170,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -23402,86 +23196,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -23491,57 +23284,57 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-32-EX-NEXT: ret void // // @@ -23550,25 +23343,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -23576,86 +23369,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -23665,47 +23457,47 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK-32-EX-NEXT: ret void // // @@ -23714,25 +23506,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -23740,86 +23532,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -23829,49 +23620,49 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP247:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -23886,25 +23677,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -23912,86 +23703,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -24001,49 +23791,49 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP250:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -24058,25 +23848,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -24084,86 +23874,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -24173,49 +23962,49 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP253:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -24230,25 +24019,25 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -24256,86 +24045,85 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: // CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] -// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -24345,49 +24133,49 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP256:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -24401,30 +24189,28 @@ int a; // CHECK-32-EX-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined to i8*), i8* null, i8** [[TMP4]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24432,93 +24218,92 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24526,76 +24311,75 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24603,93 +24387,92 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24697,41 +24480,41 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP259:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -24744,25 +24527,24 @@ int a; // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24770,41 +24552,41 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP262:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -24817,25 +24599,24 @@ int a; // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24843,41 +24624,41 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP265:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -24890,25 +24671,24 @@ int a; // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24916,41 +24696,41 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP268:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -24964,30 +24744,28 @@ int a; // CHECK-32-EX-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined to i8*), i8* null, i8** [[TMP4]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24995,101 +24773,100 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP271:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25097,84 +24874,83 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP274:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25182,101 +24958,100 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP277:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25284,80 +25059,79 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP280:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25365,80 +25139,79 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP283:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25446,80 +25219,79 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP286:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25527,80 +25299,79 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP289:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25608,81 +25379,80 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 65, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 65, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP291]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP291]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP291]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_fini_4(ptr @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP291]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP292:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25690,84 +25460,83 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP295:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25775,101 +25544,100 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP298:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25877,80 +25645,79 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP301:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25958,80 +25725,79 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP304:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26039,80 +25805,79 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP307:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26120,80 +25885,79 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP310:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26201,93 +25965,92 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26295,76 +26058,75 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26372,93 +26134,92 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26466,41 +26227,41 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP313:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -26513,25 +26274,24 @@ int a; // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26539,41 +26299,41 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP316:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -26586,25 +26346,24 @@ int a; // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26612,41 +26371,41 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP319:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -26659,25 +26418,24 @@ int a; // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26685,41 +26443,41 @@ int a; // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP322:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] diff --git a/clang/test/OpenMP/nvptx_param_translate.c b/clang/test/OpenMP/nvptx_param_translate.c index 537702f..8ee3fba 100644 --- a/clang/test/OpenMP/nvptx_param_translate.c +++ b/clang/test/OpenMP/nvptx_param_translate.c @@ -1,10 +1,10 @@ -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s // expected-no-diagnostics -// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** % -// CHECK: [[FN:%.+]] = bitcast void (i8*, ...)* [[MAP_FN]] to void (i8*, -// CHECK: call void [[FN]](i8* % +// CHECK: store ptr @{{.+}}, ptr [[FN:%[^,]+]], +// CHECK: [[MAP_FN:%.+]] = load ptr, ptr [[FN]] +// CHECK: call void [[MAP_FN]](ptr % int main() { double a, b; diff --git a/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp b/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp index 875cc47..1390777 100644 --- a/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp @@ -1,8 +1,8 @@ // Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -14,7 +14,6 @@ struct TT { }; // TCHECK-DAG: [[TTII:%.+]] = type { i32, i32 } -// TCHECK-DAG: [[TTIC:%.+]] = type { i8, i8 } // TCHECK-DAG: [[TT:%.+]] = type { i64, i8 } // TCHECK-DAG: [[S1:%.+]] = type { double } @@ -33,11 +32,11 @@ int foo(int n, double *ptr) { b[a] += e.X; } - // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}([10 x float] addrspace(1)* noalias noundef [[B_IN:%.+]], i{{[0-9]+}} noundef [[A_IN:%.+]], [[TTII]]* noalias noundef [[E_IN:%.+]]) + // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(ptr addrspace(1) noalias noundef [[B_IN:%.+]], i{{[0-9]+}} noundef [[A_IN:%.+]], ptr noalias noundef [[E_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK-NOT: alloca [[TTII]], // TCHECK: alloca i{{[0-9]+}}, - // TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], + // TCHECK: store i{{[0-9]+}} [[A_IN]], ptr [[A_ADDR]], // TCHECK: ret void #pragma omp target firstprivate(aa, b, c, d) @@ -51,56 +50,50 @@ int foo(int n, double *ptr) { // make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the // target region - // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}}{{.*}} [[A2_IN:%.+]], [10 x float]*{{.*}} [[B_IN:%.+]], [5 x [10 x double]]*{{.*}} [[C_IN:%.+]], [[TT]]*{{.*}} [[D_IN:%.+]]) + // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}}{{.*}} [[A2_IN:%.+]], ptr{{.*}} [[B_IN:%.+]], ptr{{.*}} [[C_IN:%.+]], ptr{{.*}} [[D_IN:%.+]]) // TCHECK: [[A2_ADDR:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[B_ADDR:%.+]] = alloca [10 x float]*, - // TCHECK: [[C_ADDR:%.+]] = alloca [5 x [10 x double]]*, - // TCHECK: [[D_ADDR:%.+]] = alloca [[TT]]*, + // TCHECK: [[B_ADDR:%.+]] = alloca ptr, + // TCHECK: [[C_ADDR:%.+]] = alloca ptr, + // TCHECK: [[D_ADDR:%.+]] = alloca ptr, // TCHECK-NOT: alloca i{{[0-9]+}}, // TCHECK: [[B_PRIV:%.+]] = alloca [10 x float], // TCHECK: [[C_PRIV:%.+]] = alloca [5 x [10 x double]], // TCHECK: [[D_PRIV:%.+]] = alloca [[TT]], - // TCHECK: store i{{[0-9]+}} [[A2_IN]], i{{[0-9]+}}* [[A2_ADDR]], - // TCHECK: store [10 x float]* [[B_IN]], [10 x float]** [[B_ADDR]], - // TCHECK: store [5 x [10 x double]]* [[C_IN]], [5 x [10 x double]]** [[C_ADDR]], - // TCHECK: store [[TT]]* [[D_IN]], [[TT]]** [[D_ADDR]], - // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x float]*, [10 x float]** [[B_ADDR]], - // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x float]*, [10 x float]** % - // TCHECK: [[C_ADDR_REF:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], - // TCHECK: [[C_ADDR_REF:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** % - // TCHECK: [[D_ADDR_REF:%.+]] = load [[TT]]*, [[TT]]** [[D_ADDR]], - // TCHECK: [[D_ADDR_REF:%.+]] = load [[TT]]*, [[TT]]** % + // TCHECK: store i{{[0-9]+}} [[A2_IN]], ptr [[A2_ADDR]], + // TCHECK: store ptr [[B_IN]], ptr [[B_ADDR]], + // TCHECK: store ptr [[C_IN]], ptr [[C_ADDR]], + // TCHECK: store ptr [[D_IN]], ptr [[D_ADDR]], + // TCHECK: [[B_ADDR_REF:%.+]] = load ptr, ptr [[B_ADDR]], + // TCHECK: [[B_ADDR_REF:%.+]] = load ptr, ptr % + // TCHECK: [[C_ADDR_REF:%.+]] = load ptr, ptr [[C_ADDR]], + // TCHECK: [[C_ADDR_REF:%.+]] = load ptr, ptr % + // TCHECK: [[D_ADDR_REF:%.+]] = load ptr, ptr [[D_ADDR]], + // TCHECK: [[D_ADDR_REF:%.+]] = load ptr, ptr % // firstprivate(aa): a_priv = a_in // firstprivate(b): memcpy(b_priv,b_in) - // TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x float]* [[B_PRIV]] to i8* - // TCHECK: [[B_ADDR_REF_BCAST:%.+]] = bitcast [10 x float]* [[B_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_ADDR_REF_BCAST]], {{.+}}) + // TCHECK: call void @llvm.memcpy.{{.+}}(ptr align {{[0-9]+}} [[B_PRIV]], ptr align {{[0-9]+}} [[B_ADDR_REF]], {{.+}}) // firstprivate(c) - // TCHECK: [[C_PRIV_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_PRIV]] to i8* - // TCHECK: [[C_IN_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[C_PRIV_BCAST]], i8* align {{[0-9]+}} [[C_IN_BCAST]],{{.+}}) + // TCHECK: call void @llvm.memcpy.{{.+}}(ptr align {{[0-9]+}} [[C_PRIV]], ptr align {{[0-9]+}} [[C_ADDR_REF]],{{.+}}) // firstprivate(d) - // TCHECK: [[D_PRIV_BCAST:%.+]] = bitcast [[TT]]* [[D_PRIV]] to i8* - // TCHECK: [[D_IN_BCAST:%.+]] = bitcast [[TT]]* [[D_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[D_PRIV_BCAST]], i8* align {{[0-9]+}} [[D_IN_BCAST]],{{.+}}) + // TCHECK: call void @llvm.memcpy.{{.+}}(ptr align {{[0-9]+}} [[D_PRIV]], ptr align {{[0-9]+}} [[D_ADDR_REF]],{{.+}}) - // TCHECK: load i16, i16* [[A2_ADDR]], + // TCHECK: load i16, ptr [[A2_ADDR]], #pragma omp target firstprivate(ptr) { ptr[0]++; } - // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(double* noundef [[PTR_IN:%.+]]) - // TCHECK: [[PTR_ADDR:%.+]] = alloca double*, - // TCHECK-NOT: alloca double*, - // TCHECK: store double* [[PTR_IN]], double** [[PTR_ADDR]], - // TCHECK: [[PTR_IN_REF:%.+]] = load double*, double** [[PTR_ADDR]], - // TCHECK-NOT: store double* [[PTR_IN_REF]], double** {{%.+}}, + // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(ptr noundef [[PTR_IN:%.+]]) + // TCHECK: [[PTR_ADDR:%.+]] = alloca ptr, + // TCHECK-NOT: alloca ptr, + // TCHECK: store ptr [[PTR_IN]], ptr [[PTR_ADDR]], + // TCHECK: [[PTR_IN_REF:%.+]] = load ptr, ptr [[PTR_ADDR]], + // TCHECK-NOT: store ptr [[PTR_IN_REF]], ptr {{%.+}}, return a; } @@ -140,29 +133,26 @@ void fconst(const tx t) { { } } -// TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}}{{.*}} [[A_IN:%.+]], i{{[0-9]+}}{{.*}} [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) +// TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}}{{.*}} [[A_IN:%.+]], i{{[0-9]+}}{{.*}} [[A3_IN:%.+]], ptr {{.+}} [[B_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[A3_ADDR:%.+]] = alloca i{{[0-9]+}}, -// TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, +// TCHECK: [[B_ADDR:%.+]] = alloca ptr, // TCHECK-NOT: alloca i{{[0-9]+}}, // TCHECK: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}], -// TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], -// TCHECK: store i{{[0-9]+}} [[A3_IN]], i{{[0-9]+}}* [[A3_ADDR]], -// TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], -// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], -// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** % +// TCHECK: store i{{[0-9]+}} [[A_IN]], ptr [[A_ADDR]], +// TCHECK: store i{{[0-9]+}} [[A3_IN]], ptr [[A3_ADDR]], +// TCHECK: store ptr [[B_IN]], ptr [[B_ADDR]], +// TCHECK: [[B_ADDR_REF:%.+]] = load ptr, ptr [[B_ADDR]], +// TCHECK: [[B_ADDR_REF:%.+]] = load ptr, ptr % // firstprivate(a): a_priv = a_in // firstprivate(aaa) -// TCHECK-NOT: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* +// TCHECK-NOT: store i{{[0-9]+}} %{{.+}}, ptr // firstprivate(b) -// TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8* -// TCHECK: [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8* -// TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}}) - +// TCHECK: call void @llvm.memcpy.{{.+}}(ptr align {{[0-9]+}} [[B_PRIV]], ptr align {{[0-9]+}} [[B_ADDR_REF]],{{.+}}) // TCHECK: ret void struct S1 { @@ -179,18 +169,17 @@ struct S1 { return (int)b; } - // TCHECK: define internal void @__omp_offloading_{{.+}}([[S1]]* noundef [[TH:%.+]], i{{[0-9]+}} noundef [[B_IN:%.+]]) - // TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*, + // TCHECK: define internal void @__omp_offloading_{{.+}}(ptr noundef [[TH:%.+]], i{{[0-9]+}} noundef [[B_IN:%.+]]) + // TCHECK: [[TH_ADDR:%.+]] = alloca ptr, // TCHECK: [[B_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK-NOT: alloca i{{[0-9]+}}, - // TCHECK: store [[S1]]* [[TH]], [[S1]]** [[TH_ADDR]], - // TCHECK: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[B_ADDR]], - // TCHECK: [[TH_ADDR_REF:%.+]] = load [[S1]]*, [[S1]]** [[TH_ADDR]], - // TCHECK-64: [[B_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[B_ADDR]] to i{{[0-9]+}}* + // TCHECK: store ptr [[TH]], ptr [[TH_ADDR]], + // TCHECK: store i{{[0-9]+}} [[B_IN]], ptr [[B_ADDR]], + // TCHECK: [[TH_ADDR_REF:%.+]] = load ptr, ptr [[TH_ADDR]], // firstprivate(b) - // TCHECK-NOT: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* + // TCHECK-NOT: store i{{[0-9]+}} %{{.+}}, ptr // TCHECK: ret void }; @@ -211,23 +200,21 @@ int bar(int n, double *ptr) { // template -// TCHECK: define internal void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} noundef [[B_IN:%.+]]) +// TCHECK: define internal void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], ptr{{.+}} noundef [[B_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, -// TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, +// TCHECK: [[B_ADDR:%.+]] = alloca ptr, // TCHECK-NOT: alloca i{{[0-9]+}}, // TCHECK: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}], -// TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], -// TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], -// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], -// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** % +// TCHECK: store i{{[0-9]+}} [[A_IN]], ptr [[A_ADDR]], +// TCHECK: store ptr [[B_IN]], ptr [[B_ADDR]], +// TCHECK: [[B_ADDR_REF:%.+]] = load ptr, ptr [[B_ADDR]], +// TCHECK: [[B_ADDR_REF:%.+]] = load ptr, ptr % // firstprivate(a) -// TCHECK-NOT: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* +// TCHECK-NOT: store i{{[0-9]+}} %{{.+}}, ptr // firstprivate(b) -// TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8* -// TCHECK: [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8* -// TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}}) +// TCHECK: call void @llvm.memcpy.{{.+}}(ptr align {{[0-9]+}} [[B_PRIV]], ptr align {{[0-9]+}} [[B_ADDR_REF]],{{.+}}) // TCHECK: ret void diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp index e94befa..f7032d8 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-64 -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32 -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32-EX +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32-EX // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -98,58 +98,54 @@ int bar(int n){ #endif // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24 -// CHECK-64-SAME: (double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-64-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[E_ADDR:%.*]] = alloca double*, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 -// CHECK-64-NEXT: store double* [[E]], double** [[E_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-64-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP0]] to i8* -// CHECK-64-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined to i8*), i8* null, i8** [[TMP5]], i64 1) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[E_ADDR:%.*]] = alloca double*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[E1:%.*]] = alloca double, align 8 -// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store double* [[E]], double** [[E_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 8 -// CHECK-64-NEXT: store double 0.000000e+00, double* [[E1]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load double, double* [[E1]], align 8 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8 +// CHECK-64-NEXT: store double 0.000000e+00, ptr [[E1]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load double, ptr [[E1]], align 8 // CHECK-64-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 -// CHECK-64-NEXT: store double [[ADD]], double* [[E1]], align 8 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast double* [[E1]] to i8* -// CHECK-64-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 -// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-64-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i64 8, i8* [[TMP6]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) +// CHECK-64-NEXT: store double [[ADD]], ptr [[E1]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: store ptr [[E1]], ptr [[TMP4]], align 8 +// CHECK-64-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) // CHECK-64-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 // CHECK-64-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-64: .omp.reduction.then: -// CHECK-64-NEXT: [[TMP9:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK-64-NEXT: [[TMP10:%.*]] = load double, double* [[E1]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP0]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load double, ptr [[E1]], align 8 // CHECK-64-NEXT: [[ADD2:%.*]] = fadd double [[TMP9]], [[TMP10]] -// CHECK-64-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 +// CHECK-64-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8 // CHECK-64-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) // CHECK-64-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-64: .omp.reduction.done: @@ -157,40 +153,34 @@ int bar(int n){ // // // CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK-64-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-64-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 // CHECK-64-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 // CHECK-64-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 -// CHECK-64-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK-64-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK-64-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK-64-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* -// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK-64-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to double** -// CHECK-64-NEXT: [[TMP11:%.*]] = load double*, double** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP11]], i64 1 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* -// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP11]] to i64* -// CHECK-64-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* -// CHECK-64-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 +// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK-64-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2 +// CHECK-64-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 +// CHECK-64-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i64 1 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8 // CHECK-64-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK-64-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 // CHECK-64-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) -// CHECK-64-NEXT: store i64 [[TMP20]], i64* [[TMP16]], align 8 -// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 -// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 -// CHECK-64-NEXT: [[TMP23:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK-64-NEXT: store i8* [[TMP23]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 +// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP11]], i64 1 +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK-64-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 8 // CHECK-64-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK-64-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK-64-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -205,9 +195,7 @@ int bar(int n){ // CHECK-64-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] // CHECK-64-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-64: then: -// CHECK-64-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* -// CHECK-64-NEXT: [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK-64-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined_omp$reduction$reduction_func"(i8* [[TMP36]], i8* [[TMP37]]) #[[ATTR3:[0-9]+]] +// CHECK-64-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3:[0-9]+]] // CHECK-64-NEXT: br label [[IFCONT:%.*]] // CHECK-64: else: // CHECK-64-NEXT: br label [[IFCONT]] @@ -217,14 +205,12 @@ int bar(int n){ // CHECK-64-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] // CHECK-64-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK-64: then4: -// CHECK-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to double** -// CHECK-64-NEXT: [[TMP43:%.*]] = load double*, double** [[TMP42]], align 8 -// CHECK-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to double** -// CHECK-64-NEXT: [[TMP46:%.*]] = load double*, double** [[TMP45]], align 8 -// CHECK-64-NEXT: [[TMP47:%.*]] = load double, double* [[TMP43]], align 8 -// CHECK-64-NEXT: store double [[TMP47]], double* [[TMP46]], align 8 +// CHECK-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP41]], align 8 +// CHECK-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP44]], align 8 +// CHECK-64-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP43]], align 8 +// CHECK-64-NEXT: store double [[TMP47]], ptr [[TMP46]], align 8 // CHECK-64-NEXT: br label [[IFCONT6:%.*]] // CHECK-64: else5: // CHECK-64-NEXT: br label [[IFCONT6]] @@ -233,143 +219,136 @@ int bar(int n){ // // // CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK-64-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-64-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-64-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-64-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 // CHECK-64-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-64-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* -// CHECK-64-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4 // CHECK-64-NEXT: br label [[PRECOND:%.*]] // CHECK-64: precond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 // CHECK-64-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 // CHECK-64-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK-64: body: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK-64-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK-64-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-64: then: -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* -// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] -// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK-64-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 [[TMP8]] +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK-64-NEXT: store volatile i32 [[TMP15]], ptr addrspace(3) [[TMP14]], align 4 // CHECK-64-NEXT: br label [[IFCONT:%.*]] // CHECK-64: else: // CHECK-64-NEXT: br label [[IFCONT]] // CHECK-64: ifcont: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK-64-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] // CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK-64: then2: -// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK-64-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* -// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] -// CHECK-64-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4 -// CHECK-64-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP19]], i32 [[TMP8]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load volatile i32, ptr addrspace(3) [[TMP17]], align 4 +// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 // CHECK-64-NEXT: br label [[IFCONT4:%.*]] // CHECK-64: else3: // CHECK-64-NEXT: br label [[IFCONT4]] // CHECK-64: ifcont4: // CHECK-64-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[DOTCNT_ADDR]], align 4 // CHECK-64-NEXT: br label [[PRECOND]] // CHECK-64: exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29 -// CHECK-64-SAME: (i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK-64-SAME: (ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 8 -// CHECK-64-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 8 -// CHECK-64-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: store i8* [[TMP0]], i8** [[TMP4]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP1]] to i8* -// CHECK-64-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 -// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i8*, float*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined to i8*), i8* null, i8** [[TMP7]], i64 2) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 8 -// CHECK-64-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[C1:%.*]] = alloca i8, align 1 // CHECK-64-NEXT: [[D2:%.*]] = alloca float, align 4 -// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 8 -// CHECK-64-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 8 -// CHECK-64-NEXT: store i8 0, i8* [[C1]], align 1 -// CHECK-64-NEXT: store float 1.000000e+00, float* [[D2]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-64-NEXT: store i8 0, ptr [[C1]], align 1 +// CHECK-64-NEXT: store float 1.000000e+00, ptr [[D2]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[C1]], align 1 // CHECK-64-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 // CHECK-64-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 // CHECK-64-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK-64-NEXT: store i8 [[CONV3]], i8* [[C1]], align 1 -// CHECK-64-NEXT: [[TMP3:%.*]] = load float, float* [[D2]], align 4 +// CHECK-64-NEXT: store i8 [[CONV3]], ptr [[C1]], align 1 +// CHECK-64-NEXT: [[TMP3:%.*]] = load float, ptr [[D2]], align 4 // CHECK-64-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 -// CHECK-64-NEXT: store float [[MUL]], float* [[D2]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK-64-NEXT: store i8* [[C1]], i8** [[TMP6]], align 8 -// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast float* [[D2]] to i8* -// CHECK-64-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 -// CHECK-64-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-64-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 2, i64 16, i8* [[TMP9]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func1, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func2) +// CHECK-64-NEXT: store float [[MUL]], ptr [[D2]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: store ptr [[C1]], ptr [[TMP6]], align 8 +// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK-64-NEXT: store ptr [[D2]], ptr [[TMP7]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2) // CHECK-64-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 // CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-64: .omp.reduction.then: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP0]], align 1 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP0]], align 1 // CHECK-64-NEXT: [[CONV4:%.*]] = sext i8 [[TMP12]] to i32 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i8, ptr [[C1]], align 1 // CHECK-64-NEXT: [[CONV5:%.*]] = sext i8 [[TMP13]] to i32 // CHECK-64-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] // CHECK-64-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK-64-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 -// CHECK-64-NEXT: [[TMP14:%.*]] = load float, float* [[TMP1]], align 4 -// CHECK-64-NEXT: [[TMP15:%.*]] = load float, float* [[D2]], align 4 +// CHECK-64-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP1]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load float, ptr [[D2]], align 4 // CHECK-64-NEXT: [[MUL8:%.*]] = fmul float [[TMP14]], [[TMP15]] -// CHECK-64-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 +// CHECK-64-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4 // CHECK-64-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK-64-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-64: .omp.reduction.done: @@ -377,55 +356,49 @@ int bar(int n){ // // // CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func1 -// CHECK-64-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { +// CHECK-64-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 // CHECK-64-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 // CHECK-64-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 // CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 -// CHECK-64-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK-64-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK-64-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK-64-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK-64-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP10]], i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP10]], align 1 +// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK-64-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2 +// CHECK-64-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 +// CHECK-64-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP10]], i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP10]], align 1 // CHECK-64-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 // CHECK-64-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK-64-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16 // CHECK-64-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP16]]) // CHECK-64-NEXT: [[TMP18:%.*]] = trunc i32 [[TMP17]] to i8 -// CHECK-64-NEXT: store i8 [[TMP18]], i8* [[DOTOMP_REDUCTION_ELEMENT]], align 1 -// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[TMP10]], i64 1 -// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK-64-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 8 -// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to float** -// CHECK-64-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP22]], align 8 -// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[TMP23]], i64 1 -// CHECK-64-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP25]] to i8* -// CHECK-64-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP23]] to i32* -// CHECK-64-NEXT: [[TMP28:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* -// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK-64-NEXT: store i8 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP10]], i64 1 +// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK-64-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP23]], i64 1 +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP23]], align 4 // CHECK-64-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK-64-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 // CHECK-64-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK-64-NEXT: store i32 [[TMP32]], i32* [[TMP28]], align 4 -// CHECK-64-NEXT: [[TMP33:%.*]] = getelementptr i32, i32* [[TMP27]], i64 1 -// CHECK-64-NEXT: [[TMP34:%.*]] = getelementptr i32, i32* [[TMP28]], i64 1 -// CHECK-64-NEXT: [[TMP35:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK-64-NEXT: store i8* [[TMP35]], i8** [[TMP24]], align 8 +// CHECK-64-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4 +// CHECK-64-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP23]], i64 1 +// CHECK-64-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 +// CHECK-64-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP24]], align 8 // CHECK-64-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK-64-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK-64-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -440,9 +413,7 @@ int bar(int n){ // CHECK-64-NEXT: [[TMP47:%.*]] = or i1 [[TMP46]], [[TMP45]] // CHECK-64-NEXT: br i1 [[TMP47]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-64: then: -// CHECK-64-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK-64-NEXT: [[TMP49:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK-64-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined_omp$reduction$reduction_func"(i8* [[TMP48]], i8* [[TMP49]]) #[[ATTR3]] +// CHECK-64-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3]] // CHECK-64-NEXT: br label [[IFCONT:%.*]] // CHECK-64: else: // CHECK-64-NEXT: br label [[IFCONT]] @@ -452,20 +423,18 @@ int bar(int n){ // CHECK-64-NEXT: [[TMP52:%.*]] = and i1 [[TMP50]], [[TMP51]] // CHECK-64-NEXT: br i1 [[TMP52]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK-64: then5: -// CHECK-64-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP54:%.*]] = load i8*, i8** [[TMP53]], align 8 -// CHECK-64-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP56:%.*]] = load i8*, i8** [[TMP55]], align 8 -// CHECK-64-NEXT: [[TMP57:%.*]] = load i8, i8* [[TMP54]], align 1 -// CHECK-64-NEXT: store i8 [[TMP57]], i8* [[TMP56]], align 1 -// CHECK-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to float** -// CHECK-64-NEXT: [[TMP60:%.*]] = load float*, float** [[TMP59]], align 8 -// CHECK-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to float** -// CHECK-64-NEXT: [[TMP63:%.*]] = load float*, float** [[TMP62]], align 8 -// CHECK-64-NEXT: [[TMP64:%.*]] = load float, float* [[TMP60]], align 4 -// CHECK-64-NEXT: store float [[TMP64]], float* [[TMP63]], align 4 +// CHECK-64-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP53]], align 8 +// CHECK-64-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP55]], align 8 +// CHECK-64-NEXT: [[TMP57:%.*]] = load i8, ptr [[TMP54]], align 1 +// CHECK-64-NEXT: store i8 [[TMP57]], ptr [[TMP56]], align 1 +// CHECK-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP58]], align 8 +// CHECK-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 8 +// CHECK-64-NEXT: [[TMP64:%.*]] = load float, ptr [[TMP60]], align 4 +// CHECK-64-NEXT: store float [[TMP64]], ptr [[TMP63]], align 4 // CHECK-64-NEXT: br label [[IFCONT7:%.*]] // CHECK-64: else6: // CHECK-64-NEXT: br label [[IFCONT7]] @@ -474,74 +443,69 @@ int bar(int n){ // // // CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func2 -// CHECK-64-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-64-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-64-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-64-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 // CHECK-64-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-64-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [2 x i8*]* -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK-64-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-64: then: -// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-64-NEXT: [[TMP11:%.*]] = bitcast i32 addrspace(3)* [[TMP10]] to i8 addrspace(3)* -// CHECK-64-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP9]], align 1 -// CHECK-64-NEXT: store volatile i8 [[TMP12]], i8 addrspace(3)* [[TMP11]], align 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1 +// CHECK-64-NEXT: store volatile i8 [[TMP12]], ptr addrspace(3) [[TMP10]], align 1 // CHECK-64-NEXT: br label [[IFCONT:%.*]] // CHECK-64: else: // CHECK-64-NEXT: br label [[IFCONT]] // CHECK-64: ifcont: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK-64-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] // CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK-64: then2: -// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast i32 addrspace(3)* [[TMP14]] to i8 addrspace(3)* -// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK-64-NEXT: [[TMP18:%.*]] = load volatile i8, i8 addrspace(3)* [[TMP15]], align 1 -// CHECK-64-NEXT: store i8 [[TMP18]], i8* [[TMP17]], align 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK-64-NEXT: [[TMP18:%.*]] = load volatile i8, ptr addrspace(3) [[TMP14]], align 1 +// CHECK-64-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1 // CHECK-64-NEXT: br label [[IFCONT4:%.*]] // CHECK-64: else3: // CHECK-64-NEXT: br label [[IFCONT4]] // CHECK-64: ifcont4: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK-64-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK-64: then6: -// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP19]], align 8 -// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to i32* -// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK-64-NEXT: store volatile i32 [[TMP23]], i32 addrspace(3)* [[TMP22]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK-64-NEXT: store volatile i32 [[TMP23]], ptr addrspace(3) [[TMP22]], align 4 // CHECK-64-NEXT: br label [[IFCONT8:%.*]] // CHECK-64: else7: // CHECK-64-NEXT: br label [[IFCONT8]] // CHECK-64: ifcont8: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK-64-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP24]] // CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK-64: then10: -// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP27:%.*]] = load i8*, i8** [[TMP26]], align 8 -// CHECK-64-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP27]] to i32* -// CHECK-64-NEXT: [[TMP29:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP25]], align 4 -// CHECK-64-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 +// CHECK-64-NEXT: [[TMP29:%.*]] = load volatile i32, ptr addrspace(3) [[TMP25]], align 4 +// CHECK-64-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 // CHECK-64-NEXT: br label [[IFCONT12:%.*]] // CHECK-64: else11: // CHECK-64-NEXT: br label [[IFCONT12]] @@ -550,101 +514,95 @@ int bar(int n){ // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35 -// CHECK-64-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR0]] { +// CHECK-64-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR0]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK-64-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK-64-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 -// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP1]] to i8* -// CHECK-64-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 -// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined to i8*), i8* null, i8** [[TMP8]], i64 2) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8 +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK-64-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK-64-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[A1]], align 4 +// CHECK-64-NEXT: store i16 -32768, ptr [[B2]], align 2 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1]], align 4 // CHECK-64-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK-64-NEXT: store i32 [[OR]], i32* [[A1]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-64-NEXT: store i32 [[OR]], ptr [[A1]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2]], align 2 // CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2]], align 2 // CHECK-64-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: // CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] // CHECK-64-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK-64-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK-64-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 -// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK-64-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 -// CHECK-64-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-64-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 2, i64 16, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4) +// CHECK-64-NEXT: store i16 [[CONV4]], ptr [[B2]], align 2 +// CHECK-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: store ptr [[A1]], ptr [[TMP7]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK-64-NEXT: store ptr [[B2]], ptr [[TMP9]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP6]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4) // CHECK-64-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 // CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-64: .omp.reduction.then: -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[A1]], align 4 // CHECK-64-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK-64-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK-64-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP1]], align 2 // CHECK-64-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i16, ptr [[B2]], align 2 // CHECK-64-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 // CHECK-64-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] // CHECK-64-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] // CHECK-64: cond.true9: -// CHECK-64-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP1]], align 2 // CHECK-64-NEXT: br label [[COND_END11:%.*]] // CHECK-64: cond.false10: -// CHECK-64-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i16, ptr [[B2]], align 2 // CHECK-64-NEXT: br label [[COND_END11]] // CHECK-64: cond.end11: // CHECK-64-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK-64-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 +// CHECK-64-NEXT: store i16 [[COND12]], ptr [[TMP1]], align 2 // CHECK-64-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) // CHECK-64-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-64: .omp.reduction.done: @@ -652,56 +610,49 @@ int bar(int n){ // // // CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3 -// CHECK-64-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { +// CHECK-64-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 // CHECK-64-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 // CHECK-64-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 -// CHECK-64-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK-64-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK-64-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK-64-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK-64-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32** -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i64 1 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK-64-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2 +// CHECK-64-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 +// CHECK-64-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i64 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 // CHECK-64-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK-64-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 // CHECK-64-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) -// CHECK-64-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP11]], i64 1 -// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK-64-NEXT: store i8* [[TMP21]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i16** -// CHECK-64-NEXT: [[TMP24:%.*]] = load i16*, i16** [[TMP23]], align 8 -// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 -// CHECK-64-NEXT: [[TMP27:%.*]] = bitcast i16* [[TMP26]] to i8* -// CHECK-64-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP24]], align 2 +// CHECK-64-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i64 1 +// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK-64-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i64 1 +// CHECK-64-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2 // CHECK-64-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 // CHECK-64-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK-64-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 // CHECK-64-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) // CHECK-64-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 -// CHECK-64-NEXT: store i16 [[TMP33]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK-64-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 -// CHECK-64-NEXT: [[TMP35:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 -// CHECK-64-NEXT: [[TMP36:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK-64-NEXT: store i8* [[TMP36]], i8** [[TMP25]], align 8 +// CHECK-64-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK-64-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i64 1 +// CHECK-64-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 +// CHECK-64-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 8 // CHECK-64-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK-64-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK-64-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -716,9 +667,7 @@ int bar(int n){ // CHECK-64-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] // CHECK-64-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-64: then: -// CHECK-64-NEXT: [[TMP49:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK-64-NEXT: [[TMP50:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK-64-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined_omp$reduction$reduction_func"(i8* [[TMP49]], i8* [[TMP50]]) #[[ATTR3]] +// CHECK-64-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3]] // CHECK-64-NEXT: br label [[IFCONT:%.*]] // CHECK-64: else: // CHECK-64-NEXT: br label [[IFCONT]] @@ -728,22 +677,18 @@ int bar(int n){ // CHECK-64-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] // CHECK-64-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK-64: then5: -// CHECK-64-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32** -// CHECK-64-NEXT: [[TMP56:%.*]] = load i32*, i32** [[TMP55]], align 8 -// CHECK-64-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i32** -// CHECK-64-NEXT: [[TMP59:%.*]] = load i32*, i32** [[TMP58]], align 8 -// CHECK-64-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK-64-NEXT: store i32 [[TMP60]], i32* [[TMP59]], align 4 -// CHECK-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i16** -// CHECK-64-NEXT: [[TMP63:%.*]] = load i16*, i16** [[TMP62]], align 8 -// CHECK-64-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i16** -// CHECK-64-NEXT: [[TMP66:%.*]] = load i16*, i16** [[TMP65]], align 8 -// CHECK-64-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP63]], align 2 -// CHECK-64-NEXT: store i16 [[TMP67]], i16* [[TMP66]], align 2 +// CHECK-64-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 8 +// CHECK-64-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 8 +// CHECK-64-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4 +// CHECK-64-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4 +// CHECK-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 8 +// CHECK-64-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 8 +// CHECK-64-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2 +// CHECK-64-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2 // CHECK-64-NEXT: br label [[IFCONT7:%.*]] // CHECK-64: else6: // CHECK-64-NEXT: br label [[IFCONT7]] @@ -752,76 +697,69 @@ int bar(int n){ // // // CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4 -// CHECK-64-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-64-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-64-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-64-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 // CHECK-64-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-64-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [2 x i8*]* -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK-64-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-64: then: -// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 -// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK-64-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK-64-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 // CHECK-64-NEXT: br label [[IFCONT:%.*]] // CHECK-64: else: // CHECK-64-NEXT: br label [[IFCONT]] // CHECK-64: ifcont: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK-64-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] // CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK-64: then2: -// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* -// CHECK-64-NEXT: [[TMP18:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4 -// CHECK-64-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK-64-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 +// CHECK-64-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 // CHECK-64-NEXT: br label [[IFCONT4:%.*]] // CHECK-64: else3: // CHECK-64-NEXT: br label [[IFCONT4]] // CHECK-64: ifcont4: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK-64-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK-64: then6: -// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP19]], align 8 -// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to i16* -// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-64-NEXT: [[TMP23:%.*]] = bitcast i32 addrspace(3)* [[TMP22]] to i16 addrspace(3)* -// CHECK-64-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP21]], align 2 -// CHECK-64-NEXT: store volatile i16 [[TMP24]], i16 addrspace(3)* [[TMP23]], align 2 +// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2 +// CHECK-64-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2 // CHECK-64-NEXT: br label [[IFCONT8:%.*]] // CHECK-64: else7: // CHECK-64-NEXT: br label [[IFCONT8]] // CHECK-64: ifcont8: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK-64-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] // CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK-64: then10: -// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-64-NEXT: [[TMP27:%.*]] = bitcast i32 addrspace(3)* [[TMP26]] to i16 addrspace(3)* -// CHECK-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP29:%.*]] = load i8*, i8** [[TMP28]], align 8 -// CHECK-64-NEXT: [[TMP30:%.*]] = bitcast i8* [[TMP29]] to i16* -// CHECK-64-NEXT: [[TMP31:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP27]], align 2 -// CHECK-64-NEXT: store i16 [[TMP31]], i16* [[TMP30]], align 2 +// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 8 +// CHECK-64-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2 +// CHECK-64-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2 // CHECK-64-NEXT: br label [[IFCONT12:%.*]] // CHECK-64: else11: // CHECK-64-NEXT: br label [[IFCONT12]] @@ -830,58 +768,54 @@ int bar(int n){ // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24 -// CHECK-32-SAME: (double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-32-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 -// CHECK-32-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP0]] to i8* -// CHECK-32-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined to i8*), i8* null, i8** [[TMP5]], i32 1) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[E1:%.*]] = alloca double, align 8 -// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK-32-NEXT: store double 0.000000e+00, double* [[E1]], align 8 -// CHECK-32-NEXT: [[TMP1:%.*]] = load double, double* [[E1]], align 8 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4 +// CHECK-32-NEXT: store double 0.000000e+00, ptr [[E1]], align 8 +// CHECK-32-NEXT: [[TMP1:%.*]] = load double, ptr [[E1]], align 8 // CHECK-32-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 -// CHECK-32-NEXT: store double [[ADD]], double* [[E1]], align 8 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast double* [[E1]] to i8* -// CHECK-32-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-32-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 4, i8* [[TMP6]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) +// CHECK-32-NEXT: store double [[ADD]], ptr [[E1]], align 8 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: store ptr [[E1]], ptr [[TMP4]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) // CHECK-32-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 // CHECK-32-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-32: .omp.reduction.then: -// CHECK-32-NEXT: [[TMP9:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK-32-NEXT: [[TMP10:%.*]] = load double, double* [[E1]], align 8 +// CHECK-32-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP0]], align 8 +// CHECK-32-NEXT: [[TMP10:%.*]] = load double, ptr [[E1]], align 8 // CHECK-32-NEXT: [[ADD2:%.*]] = fadd double [[TMP9]], [[TMP10]] -// CHECK-32-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 +// CHECK-32-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8 // CHECK-32-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) // CHECK-32-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-32: .omp.reduction.done: @@ -889,40 +823,34 @@ int bar(int n){ // // // CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK-32-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-32-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 // CHECK-32-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 // CHECK-32-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 -// CHECK-32-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK-32-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK-32-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK-32-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* -// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to double** -// CHECK-32-NEXT: [[TMP11:%.*]] = load double*, double** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP11]], i32 1 -// CHECK-32-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* -// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP11]] to i64* -// CHECK-32-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* -// CHECK-32-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 +// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK-32-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2 +// CHECK-32-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 +// CHECK-32-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i32 1 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8 // CHECK-32-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK-32-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 // CHECK-32-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) -// CHECK-32-NEXT: store i64 [[TMP20]], i64* [[TMP16]], align 8 -// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i32 1 -// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i32 1 -// CHECK-32-NEXT: [[TMP23:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK-32-NEXT: store i8* [[TMP23]], i8** [[TMP12]], align 4 +// CHECK-32-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 +// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP11]], i32 1 +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK-32-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4 // CHECK-32-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK-32-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK-32-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -937,9 +865,7 @@ int bar(int n){ // CHECK-32-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] // CHECK-32-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-32: then: -// CHECK-32-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* -// CHECK-32-NEXT: [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK-32-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined_omp$reduction$reduction_func"(i8* [[TMP36]], i8* [[TMP37]]) #[[ATTR3:[0-9]+]] +// CHECK-32-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3:[0-9]+]] // CHECK-32-NEXT: br label [[IFCONT:%.*]] // CHECK-32: else: // CHECK-32-NEXT: br label [[IFCONT]] @@ -949,14 +875,12 @@ int bar(int n){ // CHECK-32-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] // CHECK-32-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK-32: then4: -// CHECK-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to double** -// CHECK-32-NEXT: [[TMP43:%.*]] = load double*, double** [[TMP42]], align 4 -// CHECK-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to double** -// CHECK-32-NEXT: [[TMP46:%.*]] = load double*, double** [[TMP45]], align 4 -// CHECK-32-NEXT: [[TMP47:%.*]] = load double, double* [[TMP43]], align 8 -// CHECK-32-NEXT: store double [[TMP47]], double* [[TMP46]], align 8 +// CHECK-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP41]], align 4 +// CHECK-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP44]], align 4 +// CHECK-32-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP43]], align 8 +// CHECK-32-NEXT: store double [[TMP47]], ptr [[TMP46]], align 8 // CHECK-32-NEXT: br label [[IFCONT6:%.*]] // CHECK-32: else5: // CHECK-32-NEXT: br label [[IFCONT6]] @@ -965,143 +889,136 @@ int bar(int n){ // // // CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK-32-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-32-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 // CHECK-32-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* -// CHECK-32-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4 // CHECK-32-NEXT: br label [[PRECOND:%.*]] // CHECK-32: precond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 // CHECK-32-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 // CHECK-32-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK-32: body: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK-32-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK-32-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-32: then: -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* -// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] -// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK-32-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 [[TMP8]] +// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK-32-NEXT: store volatile i32 [[TMP15]], ptr addrspace(3) [[TMP14]], align 4 // CHECK-32-NEXT: br label [[IFCONT:%.*]] // CHECK-32: else: // CHECK-32-NEXT: br label [[IFCONT]] // CHECK-32: ifcont: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK-32-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] // CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK-32: then2: -// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 4 -// CHECK-32-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* -// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] -// CHECK-32-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4 -// CHECK-32-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP19]], i32 [[TMP8]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load volatile i32, ptr addrspace(3) [[TMP17]], align 4 +// CHECK-32-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 // CHECK-32-NEXT: br label [[IFCONT4:%.*]] // CHECK-32: else3: // CHECK-32-NEXT: br label [[IFCONT4]] // CHECK-32: ifcont4: // CHECK-32-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK-32-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP23]], ptr [[DOTCNT_ADDR]], align 4 // CHECK-32-NEXT: br label [[PRECOND]] // CHECK-32: exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29 -// CHECK-32-SAME: (i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK-32-SAME: (ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 -// CHECK-32-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 -// CHECK-32-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: store i8* [[TMP0]], i8** [[TMP4]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP1]] to i8* -// CHECK-32-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i8*, float*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined to i8*), i8* null, i8** [[TMP7]], i32 2) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 -// CHECK-32-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[C1:%.*]] = alloca i8, align 1 // CHECK-32-NEXT: [[D2:%.*]] = alloca float, align 4 -// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 -// CHECK-32-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 -// CHECK-32-NEXT: store i8 0, i8* [[C1]], align 1 -// CHECK-32-NEXT: store float 1.000000e+00, float* [[D2]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK-32-NEXT: store i8 0, ptr [[C1]], align 1 +// CHECK-32-NEXT: store float 1.000000e+00, ptr [[D2]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[C1]], align 1 // CHECK-32-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 // CHECK-32-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 // CHECK-32-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK-32-NEXT: store i8 [[CONV3]], i8* [[C1]], align 1 -// CHECK-32-NEXT: [[TMP3:%.*]] = load float, float* [[D2]], align 4 +// CHECK-32-NEXT: store i8 [[CONV3]], ptr [[C1]], align 1 +// CHECK-32-NEXT: [[TMP3:%.*]] = load float, ptr [[D2]], align 4 // CHECK-32-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 -// CHECK-32-NEXT: store float [[MUL]], float* [[D2]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK-32-NEXT: store i8* [[C1]], i8** [[TMP6]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast float* [[D2]] to i8* -// CHECK-32-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-32-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 2, i32 8, i8* [[TMP9]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func1, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func2) +// CHECK-32-NEXT: store float [[MUL]], ptr [[D2]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: store ptr [[C1]], ptr [[TMP6]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK-32-NEXT: store ptr [[D2]], ptr [[TMP7]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2) // CHECK-32-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 // CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-32: .omp.reduction.then: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP0]], align 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP0]], align 1 // CHECK-32-NEXT: [[CONV4:%.*]] = sext i8 [[TMP12]] to i32 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i8, ptr [[C1]], align 1 // CHECK-32-NEXT: [[CONV5:%.*]] = sext i8 [[TMP13]] to i32 // CHECK-32-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] // CHECK-32-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK-32-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 -// CHECK-32-NEXT: [[TMP14:%.*]] = load float, float* [[TMP1]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load float, float* [[D2]], align 4 +// CHECK-32-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1 +// CHECK-32-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load float, ptr [[D2]], align 4 // CHECK-32-NEXT: [[MUL8:%.*]] = fmul float [[TMP14]], [[TMP15]] -// CHECK-32-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 +// CHECK-32-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4 // CHECK-32-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK-32-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-32: .omp.reduction.done: @@ -1109,55 +1026,49 @@ int bar(int n){ // // // CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func1 -// CHECK-32-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { +// CHECK-32-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 // CHECK-32-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 // CHECK-32-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 // CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 -// CHECK-32-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK-32-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK-32-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK-32-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP10]], align 1 +// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK-32-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2 +// CHECK-32-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 +// CHECK-32-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP10]], i32 1 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP10]], align 1 // CHECK-32-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 // CHECK-32-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK-32-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16 // CHECK-32-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP16]]) // CHECK-32-NEXT: [[TMP18:%.*]] = trunc i32 [[TMP17]] to i8 -// CHECK-32-NEXT: store i8 [[TMP18]], i8* [[DOTOMP_REDUCTION_ELEMENT]], align 1 -// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 -// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK-32-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to float** -// CHECK-32-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP22]], align 4 -// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[TMP23]], i32 1 -// CHECK-32-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP25]] to i8* -// CHECK-32-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP23]] to i32* -// CHECK-32-NEXT: [[TMP28:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* -// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK-32-NEXT: store i8 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP10]], i32 1 +// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK-32-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP11]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP21]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP23]], i32 1 +// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP23]], align 4 // CHECK-32-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK-32-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 // CHECK-32-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK-32-NEXT: store i32 [[TMP32]], i32* [[TMP28]], align 4 -// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr i32, i32* [[TMP27]], i32 1 -// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr i32, i32* [[TMP28]], i32 1 -// CHECK-32-NEXT: [[TMP35:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK-32-NEXT: store i8* [[TMP35]], i8** [[TMP24]], align 4 +// CHECK-32-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4 +// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP23]], i32 1 +// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK-32-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP24]], align 4 // CHECK-32-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK-32-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK-32-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -1172,9 +1083,7 @@ int bar(int n){ // CHECK-32-NEXT: [[TMP47:%.*]] = or i1 [[TMP46]], [[TMP45]] // CHECK-32-NEXT: br i1 [[TMP47]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-32: then: -// CHECK-32-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK-32-NEXT: [[TMP49:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK-32-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined_omp$reduction$reduction_func"(i8* [[TMP48]], i8* [[TMP49]]) #[[ATTR3]] +// CHECK-32-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3]] // CHECK-32-NEXT: br label [[IFCONT:%.*]] // CHECK-32: else: // CHECK-32-NEXT: br label [[IFCONT]] @@ -1184,20 +1093,18 @@ int bar(int n){ // CHECK-32-NEXT: [[TMP52:%.*]] = and i1 [[TMP50]], [[TMP51]] // CHECK-32-NEXT: br i1 [[TMP52]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK-32: then5: -// CHECK-32-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP54:%.*]] = load i8*, i8** [[TMP53]], align 4 -// CHECK-32-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP56:%.*]] = load i8*, i8** [[TMP55]], align 4 -// CHECK-32-NEXT: [[TMP57:%.*]] = load i8, i8* [[TMP54]], align 1 -// CHECK-32-NEXT: store i8 [[TMP57]], i8* [[TMP56]], align 1 -// CHECK-32-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to float** -// CHECK-32-NEXT: [[TMP60:%.*]] = load float*, float** [[TMP59]], align 4 -// CHECK-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to float** -// CHECK-32-NEXT: [[TMP63:%.*]] = load float*, float** [[TMP62]], align 4 -// CHECK-32-NEXT: [[TMP64:%.*]] = load float, float* [[TMP60]], align 4 -// CHECK-32-NEXT: store float [[TMP64]], float* [[TMP63]], align 4 +// CHECK-32-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP53]], align 4 +// CHECK-32-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP55]], align 4 +// CHECK-32-NEXT: [[TMP57:%.*]] = load i8, ptr [[TMP54]], align 1 +// CHECK-32-NEXT: store i8 [[TMP57]], ptr [[TMP56]], align 1 +// CHECK-32-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP58]], align 4 +// CHECK-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4 +// CHECK-32-NEXT: [[TMP64:%.*]] = load float, ptr [[TMP60]], align 4 +// CHECK-32-NEXT: store float [[TMP64]], ptr [[TMP63]], align 4 // CHECK-32-NEXT: br label [[IFCONT7:%.*]] // CHECK-32: else6: // CHECK-32-NEXT: br label [[IFCONT7]] @@ -1206,74 +1113,69 @@ int bar(int n){ // // // CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func2 -// CHECK-32-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-32-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 // CHECK-32-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [2 x i8*]* -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK-32-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-32: then: -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast i32 addrspace(3)* [[TMP10]] to i8 addrspace(3)* -// CHECK-32-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP9]], align 1 -// CHECK-32-NEXT: store volatile i8 [[TMP12]], i8 addrspace(3)* [[TMP11]], align 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1 +// CHECK-32-NEXT: store volatile i8 [[TMP12]], ptr addrspace(3) [[TMP10]], align 1 // CHECK-32-NEXT: br label [[IFCONT:%.*]] // CHECK-32: else: // CHECK-32-NEXT: br label [[IFCONT]] // CHECK-32: ifcont: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK-32-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] // CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK-32: then2: -// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i32 addrspace(3)* [[TMP14]] to i8 addrspace(3)* -// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load volatile i8, i8 addrspace(3)* [[TMP15]], align 1 -// CHECK-32-NEXT: store i8 [[TMP18]], i8* [[TMP17]], align 1 +// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load volatile i8, ptr addrspace(3) [[TMP14]], align 1 +// CHECK-32-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1 // CHECK-32-NEXT: br label [[IFCONT4:%.*]] // CHECK-32: else3: // CHECK-32-NEXT: br label [[IFCONT4]] // CHECK-32: ifcont4: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK-32-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK-32: then6: -// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP19]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to i32* -// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK-32-NEXT: store volatile i32 [[TMP23]], i32 addrspace(3)* [[TMP22]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK-32-NEXT: store volatile i32 [[TMP23]], ptr addrspace(3) [[TMP22]], align 4 // CHECK-32-NEXT: br label [[IFCONT8:%.*]] // CHECK-32: else7: // CHECK-32-NEXT: br label [[IFCONT8]] // CHECK-32: ifcont8: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK-32-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP24]] // CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK-32: then10: -// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP27:%.*]] = load i8*, i8** [[TMP26]], align 4 -// CHECK-32-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP27]] to i32* -// CHECK-32-NEXT: [[TMP29:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP25]], align 4 -// CHECK-32-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 4 +// CHECK-32-NEXT: [[TMP29:%.*]] = load volatile i32, ptr addrspace(3) [[TMP25]], align 4 +// CHECK-32-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 // CHECK-32-NEXT: br label [[IFCONT12:%.*]] // CHECK-32: else11: // CHECK-32-NEXT: br label [[IFCONT12]] @@ -1282,101 +1184,95 @@ int bar(int n){ // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35 -// CHECK-32-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR0]] { +// CHECK-32-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR0]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK-32-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK-32-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP1]] to i8* -// CHECK-32-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined to i8*), i8* null, i8** [[TMP8]], i32 2) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK-32-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK-32-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[A1]], align 4 +// CHECK-32-NEXT: store i16 -32768, ptr [[B2]], align 2 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1]], align 4 // CHECK-32-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK-32-NEXT: store i32 [[OR]], i32* [[A1]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-32-NEXT: store i32 [[OR]], ptr [[A1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2]], align 2 // CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2]], align 2 // CHECK-32-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: // CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] // CHECK-32-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK-32-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK-32-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-32-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4) +// CHECK-32-NEXT: store i16 [[CONV4]], ptr [[B2]], align 2 +// CHECK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: store ptr [[A1]], ptr [[TMP7]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK-32-NEXT: store ptr [[B2]], ptr [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP6]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4) // CHECK-32-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 // CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-32: .omp.reduction.then: -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[A1]], align 4 // CHECK-32-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK-32-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK-32-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP1]], align 2 // CHECK-32-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i16, ptr [[B2]], align 2 // CHECK-32-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 // CHECK-32-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] // CHECK-32-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] // CHECK-32: cond.true9: -// CHECK-32-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP1]], align 2 // CHECK-32-NEXT: br label [[COND_END11:%.*]] // CHECK-32: cond.false10: -// CHECK-32-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i16, ptr [[B2]], align 2 // CHECK-32-NEXT: br label [[COND_END11]] // CHECK-32: cond.end11: // CHECK-32-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK-32-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 +// CHECK-32-NEXT: store i16 [[COND12]], ptr [[TMP1]], align 2 // CHECK-32-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) // CHECK-32-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-32: .omp.reduction.done: @@ -1384,56 +1280,49 @@ int bar(int n){ // // // CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3 -// CHECK-32-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { +// CHECK-32-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 // CHECK-32-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 // CHECK-32-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 -// CHECK-32-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK-32-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK-32-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK-32-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32** -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 -// CHECK-32-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK-32-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2 +// CHECK-32-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 +// CHECK-32-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 // CHECK-32-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK-32-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 // CHECK-32-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) -// CHECK-32-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 -// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK-32-NEXT: [[TMP21:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK-32-NEXT: store i8* [[TMP21]], i8** [[TMP12]], align 4 -// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i16** -// CHECK-32-NEXT: [[TMP24:%.*]] = load i16*, i16** [[TMP23]], align 4 -// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 -// CHECK-32-NEXT: [[TMP27:%.*]] = bitcast i16* [[TMP26]] to i8* -// CHECK-32-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP24]], align 2 +// CHECK-32-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 +// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK-32-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 +// CHECK-32-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2 // CHECK-32-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 // CHECK-32-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK-32-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 // CHECK-32-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) // CHECK-32-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 -// CHECK-32-NEXT: store i16 [[TMP33]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 -// CHECK-32-NEXT: [[TMP35:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK-32-NEXT: [[TMP36:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK-32-NEXT: store i8* [[TMP36]], i8** [[TMP25]], align 4 +// CHECK-32-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 +// CHECK-32-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK-32-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 4 // CHECK-32-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK-32-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK-32-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -1448,9 +1337,7 @@ int bar(int n){ // CHECK-32-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] // CHECK-32-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-32: then: -// CHECK-32-NEXT: [[TMP49:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK-32-NEXT: [[TMP50:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK-32-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined_omp$reduction$reduction_func"(i8* [[TMP49]], i8* [[TMP50]]) #[[ATTR3]] +// CHECK-32-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3]] // CHECK-32-NEXT: br label [[IFCONT:%.*]] // CHECK-32: else: // CHECK-32-NEXT: br label [[IFCONT]] @@ -1460,22 +1347,18 @@ int bar(int n){ // CHECK-32-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] // CHECK-32-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK-32: then5: -// CHECK-32-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32** -// CHECK-32-NEXT: [[TMP56:%.*]] = load i32*, i32** [[TMP55]], align 4 -// CHECK-32-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i32** -// CHECK-32-NEXT: [[TMP59:%.*]] = load i32*, i32** [[TMP58]], align 4 -// CHECK-32-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK-32-NEXT: store i32 [[TMP60]], i32* [[TMP59]], align 4 -// CHECK-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i16** -// CHECK-32-NEXT: [[TMP63:%.*]] = load i16*, i16** [[TMP62]], align 4 -// CHECK-32-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i16** -// CHECK-32-NEXT: [[TMP66:%.*]] = load i16*, i16** [[TMP65]], align 4 -// CHECK-32-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP63]], align 2 -// CHECK-32-NEXT: store i16 [[TMP67]], i16* [[TMP66]], align 2 +// CHECK-32-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 4 +// CHECK-32-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 4 +// CHECK-32-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4 +// CHECK-32-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4 +// CHECK-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4 +// CHECK-32-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 4 +// CHECK-32-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2 +// CHECK-32-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2 // CHECK-32-NEXT: br label [[IFCONT7:%.*]] // CHECK-32: else6: // CHECK-32-NEXT: br label [[IFCONT7]] @@ -1484,76 +1367,69 @@ int bar(int n){ // // // CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4 -// CHECK-32-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-32-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 // CHECK-32-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [2 x i8*]* -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK-32-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-32: then: -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK-32-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK-32-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 // CHECK-32-NEXT: br label [[IFCONT:%.*]] // CHECK-32: else: // CHECK-32-NEXT: br label [[IFCONT]] // CHECK-32: ifcont: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK-32-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] // CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK-32: then2: -// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* -// CHECK-32-NEXT: [[TMP18:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4 -// CHECK-32-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 +// CHECK-32-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 // CHECK-32-NEXT: br label [[IFCONT4:%.*]] // CHECK-32: else3: // CHECK-32-NEXT: br label [[IFCONT4]] // CHECK-32: ifcont4: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK-32-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK-32: then6: -// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP19]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to i16* -// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-32-NEXT: [[TMP23:%.*]] = bitcast i32 addrspace(3)* [[TMP22]] to i16 addrspace(3)* -// CHECK-32-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP21]], align 2 -// CHECK-32-NEXT: store volatile i16 [[TMP24]], i16 addrspace(3)* [[TMP23]], align 2 +// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2 +// CHECK-32-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2 // CHECK-32-NEXT: br label [[IFCONT8:%.*]] // CHECK-32: else7: // CHECK-32-NEXT: br label [[IFCONT8]] // CHECK-32: ifcont8: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK-32-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] // CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK-32: then10: -// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-32-NEXT: [[TMP27:%.*]] = bitcast i32 addrspace(3)* [[TMP26]] to i16 addrspace(3)* -// CHECK-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP29:%.*]] = load i8*, i8** [[TMP28]], align 4 -// CHECK-32-NEXT: [[TMP30:%.*]] = bitcast i8* [[TMP29]] to i16* -// CHECK-32-NEXT: [[TMP31:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP27]], align 2 -// CHECK-32-NEXT: store i16 [[TMP31]], i16* [[TMP30]], align 2 +// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 4 +// CHECK-32-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2 +// CHECK-32-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2 // CHECK-32-NEXT: br label [[IFCONT12:%.*]] // CHECK-32: else11: // CHECK-32-NEXT: br label [[IFCONT12]] @@ -1562,58 +1438,54 @@ int bar(int n){ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24 -// CHECK-32-EX-SAME: (double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-32-EX-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 -// CHECK-32-EX-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP0]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined to i8*), i8* null, i8** [[TMP5]], i32 1) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[E1:%.*]] = alloca double, align 8 -// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK-32-EX-NEXT: store double 0.000000e+00, double* [[E1]], align 8 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load double, double* [[E1]], align 8 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4 +// CHECK-32-EX-NEXT: store double 0.000000e+00, ptr [[E1]], align 8 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load double, ptr [[E1]], align 8 // CHECK-32-EX-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 -// CHECK-32-EX-NEXT: store double [[ADD]], double* [[E1]], align 8 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast double* [[E1]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 4, i8* [[TMP6]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) +// CHECK-32-EX-NEXT: store double [[ADD]], ptr [[E1]], align 8 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store ptr [[E1]], ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) // CHECK-32-EX-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-32-EX: .omp.reduction.then: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load double, double* [[E1]], align 8 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP0]], align 8 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load double, ptr [[E1]], align 8 // CHECK-32-EX-NEXT: [[ADD2:%.*]] = fadd double [[TMP9]], [[TMP10]] -// CHECK-32-EX-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 +// CHECK-32-EX-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8 // CHECK-32-EX-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) // CHECK-32-EX-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-32-EX: .omp.reduction.done: @@ -1621,40 +1493,34 @@ int bar(int n){ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK-32-EX-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-32-EX-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 // CHECK-32-EX-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 // CHECK-32-EX-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 -// CHECK-32-EX-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK-32-EX-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK-32-EX-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK-32-EX-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to double** -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load double*, double** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP11]], i32 1 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP11]] to i64* -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 +// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i32 1 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8 // CHECK-32-EX-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK-32-EX-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 // CHECK-32-EX-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) -// CHECK-32-EX-NEXT: store i64 [[TMP20]], i64* [[TMP16]], align 8 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i32 1 -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i32 1 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP23]], i8** [[TMP12]], align 4 +// CHECK-32-EX-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP11]], i32 1 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK-32-EX-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4 // CHECK-32-EX-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK-32-EX-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK-32-EX-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -1669,9 +1535,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] // CHECK-32-EX-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-32-EX: then: -// CHECK-32-EX-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* -// CHECK-32-EX-NEXT: [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK-32-EX-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined_omp$reduction$reduction_func"(i8* [[TMP36]], i8* [[TMP37]]) #[[ATTR3:[0-9]+]] +// CHECK-32-EX-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3:[0-9]+]] // CHECK-32-EX-NEXT: br label [[IFCONT:%.*]] // CHECK-32-EX: else: // CHECK-32-EX-NEXT: br label [[IFCONT]] @@ -1681,14 +1545,12 @@ int bar(int n){ // CHECK-32-EX-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] // CHECK-32-EX-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK-32-EX: then4: -// CHECK-32-EX-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to double** -// CHECK-32-EX-NEXT: [[TMP43:%.*]] = load double*, double** [[TMP42]], align 4 -// CHECK-32-EX-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to double** -// CHECK-32-EX-NEXT: [[TMP46:%.*]] = load double*, double** [[TMP45]], align 4 -// CHECK-32-EX-NEXT: [[TMP47:%.*]] = load double, double* [[TMP43]], align 8 -// CHECK-32-EX-NEXT: store double [[TMP47]], double* [[TMP46]], align 8 +// CHECK-32-EX-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP41]], align 4 +// CHECK-32-EX-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP44]], align 4 +// CHECK-32-EX-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP43]], align 8 +// CHECK-32-EX-NEXT: store double [[TMP47]], ptr [[TMP46]], align 8 // CHECK-32-EX-NEXT: br label [[IFCONT6:%.*]] // CHECK-32-EX: else5: // CHECK-32-EX-NEXT: br label [[IFCONT6]] @@ -1697,143 +1559,136 @@ int bar(int n){ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK-32-EX-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-32-EX-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-EX-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 // CHECK-32-EX-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-EX-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4 // CHECK-32-EX-NEXT: br label [[PRECOND:%.*]] // CHECK-32-EX: precond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 // CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK-32-EX: body: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK-32-EX-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-32-EX: then: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK-32-EX-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK-32-EX-NEXT: store volatile i32 [[TMP15]], ptr addrspace(3) [[TMP14]], align 4 // CHECK-32-EX-NEXT: br label [[IFCONT:%.*]] // CHECK-32-EX: else: // CHECK-32-EX-NEXT: br label [[IFCONT]] // CHECK-32-EX: ifcont: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] // CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK-32-EX: then2: -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 4 -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP19]], i32 [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load volatile i32, ptr addrspace(3) [[TMP17]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 // CHECK-32-EX-NEXT: br label [[IFCONT4:%.*]] // CHECK-32-EX: else3: // CHECK-32-EX-NEXT: br label [[IFCONT4]] // CHECK-32-EX: ifcont4: // CHECK-32-EX-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK-32-EX-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP23]], ptr [[DOTCNT_ADDR]], align 4 // CHECK-32-EX-NEXT: br label [[PRECOND]] // CHECK-32-EX: exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29 -// CHECK-32-EX-SAME: (i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK-32-EX-SAME: (ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 -// CHECK-32-EX-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 -// CHECK-32-EX-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: store i8* [[TMP0]], i8** [[TMP4]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP1]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i8*, float*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined to i8*), i8* null, i8** [[TMP7]], i32 2) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 -// CHECK-32-EX-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[C1:%.*]] = alloca i8, align 1 // CHECK-32-EX-NEXT: [[D2:%.*]] = alloca float, align 4 -// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 -// CHECK-32-EX-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i8 0, i8* [[C1]], align 1 -// CHECK-32-EX-NEXT: store float 1.000000e+00, float* [[D2]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i8 0, ptr [[C1]], align 1 +// CHECK-32-EX-NEXT: store float 1.000000e+00, ptr [[D2]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[C1]], align 1 // CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 // CHECK-32-EX-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 // CHECK-32-EX-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK-32-EX-NEXT: store i8 [[CONV3]], i8* [[C1]], align 1 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load float, float* [[D2]], align 4 +// CHECK-32-EX-NEXT: store i8 [[CONV3]], ptr [[C1]], align 1 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load float, ptr [[D2]], align 4 // CHECK-32-EX-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 -// CHECK-32-EX-NEXT: store float [[MUL]], float* [[D2]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK-32-EX-NEXT: store i8* [[C1]], i8** [[TMP6]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = bitcast float* [[D2]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 2, i32 8, i8* [[TMP9]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func1, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func2) +// CHECK-32-EX-NEXT: store float [[MUL]], ptr [[D2]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store ptr [[C1]], ptr [[TMP6]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store ptr [[D2]], ptr [[TMP7]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2) // CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-32-EX: .omp.reduction.then: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP0]], align 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP0]], align 1 // CHECK-32-EX-NEXT: [[CONV4:%.*]] = sext i8 [[TMP12]] to i32 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i8, ptr [[C1]], align 1 // CHECK-32-EX-NEXT: [[CONV5:%.*]] = sext i8 [[TMP13]] to i32 // CHECK-32-EX-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] // CHECK-32-EX-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK-32-EX-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load float, float* [[TMP1]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load float, float* [[D2]], align 4 +// CHECK-32-EX-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load float, ptr [[D2]], align 4 // CHECK-32-EX-NEXT: [[MUL8:%.*]] = fmul float [[TMP14]], [[TMP15]] -// CHECK-32-EX-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4 // CHECK-32-EX-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK-32-EX-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-32-EX: .omp.reduction.done: @@ -1841,55 +1696,49 @@ int bar(int n){ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func1 -// CHECK-32-EX-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { +// CHECK-32-EX-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 // CHECK-32-EX-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 // CHECK-32-EX-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 // CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 -// CHECK-32-EX-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK-32-EX-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK-32-EX-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK-32-EX-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP10]], align 1 +// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP10]], i32 1 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP10]], align 1 // CHECK-32-EX-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 // CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK-32-EX-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16 // CHECK-32-EX-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP16]]) // CHECK-32-EX-NEXT: [[TMP18:%.*]] = trunc i32 [[TMP17]] to i8 -// CHECK-32-EX-NEXT: store i8 [[TMP18]], i8* [[DOTOMP_REDUCTION_ELEMENT]], align 1 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK-32-EX-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to float** -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP22]], align 4 -// CHECK-32-EX-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[TMP23]], i32 1 -// CHECK-32-EX-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP25]] to i8* -// CHECK-32-EX-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP23]] to i32* -// CHECK-32-EX-NEXT: [[TMP28:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* -// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK-32-EX-NEXT: store i8 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP10]], i32 1 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK-32-EX-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP11]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP21]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP23]], i32 1 +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP23]], align 4 // CHECK-32-EX-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK-32-EX-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 // CHECK-32-EX-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK-32-EX-NEXT: store i32 [[TMP32]], i32* [[TMP28]], align 4 -// CHECK-32-EX-NEXT: [[TMP33:%.*]] = getelementptr i32, i32* [[TMP27]], i32 1 -// CHECK-32-EX-NEXT: [[TMP34:%.*]] = getelementptr i32, i32* [[TMP28]], i32 1 -// CHECK-32-EX-NEXT: [[TMP35:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP35]], i8** [[TMP24]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4 +// CHECK-32-EX-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP23]], i32 1 +// CHECK-32-EX-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK-32-EX-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP24]], align 4 // CHECK-32-EX-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK-32-EX-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK-32-EX-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -1904,9 +1753,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: [[TMP47:%.*]] = or i1 [[TMP46]], [[TMP45]] // CHECK-32-EX-NEXT: br i1 [[TMP47]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-32-EX: then: -// CHECK-32-EX-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK-32-EX-NEXT: [[TMP49:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK-32-EX-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined_omp$reduction$reduction_func"(i8* [[TMP48]], i8* [[TMP49]]) #[[ATTR3]] +// CHECK-32-EX-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3]] // CHECK-32-EX-NEXT: br label [[IFCONT:%.*]] // CHECK-32-EX: else: // CHECK-32-EX-NEXT: br label [[IFCONT]] @@ -1916,20 +1763,18 @@ int bar(int n){ // CHECK-32-EX-NEXT: [[TMP52:%.*]] = and i1 [[TMP50]], [[TMP51]] // CHECK-32-EX-NEXT: br i1 [[TMP52]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK-32-EX: then5: -// CHECK-32-EX-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP54:%.*]] = load i8*, i8** [[TMP53]], align 4 -// CHECK-32-EX-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP56:%.*]] = load i8*, i8** [[TMP55]], align 4 -// CHECK-32-EX-NEXT: [[TMP57:%.*]] = load i8, i8* [[TMP54]], align 1 -// CHECK-32-EX-NEXT: store i8 [[TMP57]], i8* [[TMP56]], align 1 -// CHECK-32-EX-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to float** -// CHECK-32-EX-NEXT: [[TMP60:%.*]] = load float*, float** [[TMP59]], align 4 -// CHECK-32-EX-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to float** -// CHECK-32-EX-NEXT: [[TMP63:%.*]] = load float*, float** [[TMP62]], align 4 -// CHECK-32-EX-NEXT: [[TMP64:%.*]] = load float, float* [[TMP60]], align 4 -// CHECK-32-EX-NEXT: store float [[TMP64]], float* [[TMP63]], align 4 +// CHECK-32-EX-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP53]], align 4 +// CHECK-32-EX-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP55]], align 4 +// CHECK-32-EX-NEXT: [[TMP57:%.*]] = load i8, ptr [[TMP54]], align 1 +// CHECK-32-EX-NEXT: store i8 [[TMP57]], ptr [[TMP56]], align 1 +// CHECK-32-EX-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP58]], align 4 +// CHECK-32-EX-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4 +// CHECK-32-EX-NEXT: [[TMP64:%.*]] = load float, ptr [[TMP60]], align 4 +// CHECK-32-EX-NEXT: store float [[TMP64]], ptr [[TMP63]], align 4 // CHECK-32-EX-NEXT: br label [[IFCONT7:%.*]] // CHECK-32-EX: else6: // CHECK-32-EX-NEXT: br label [[IFCONT7]] @@ -1938,74 +1783,69 @@ int bar(int n){ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func2 -// CHECK-32-EX-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-32-EX-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-EX-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 // CHECK-32-EX-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-EX-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [2 x i8*]* -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK-32-EX-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-32-EX: then: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast i32 addrspace(3)* [[TMP10]] to i8 addrspace(3)* -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP9]], align 1 -// CHECK-32-EX-NEXT: store volatile i8 [[TMP12]], i8 addrspace(3)* [[TMP11]], align 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1 +// CHECK-32-EX-NEXT: store volatile i8 [[TMP12]], ptr addrspace(3) [[TMP10]], align 1 // CHECK-32-EX-NEXT: br label [[IFCONT:%.*]] // CHECK-32-EX: else: // CHECK-32-EX-NEXT: br label [[IFCONT]] // CHECK-32-EX: ifcont: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] // CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK-32-EX: then2: -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i32 addrspace(3)* [[TMP14]] to i8 addrspace(3)* -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load volatile i8, i8 addrspace(3)* [[TMP15]], align 1 -// CHECK-32-EX-NEXT: store i8 [[TMP18]], i8* [[TMP17]], align 1 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load volatile i8, ptr addrspace(3) [[TMP14]], align 1 +// CHECK-32-EX-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1 // CHECK-32-EX-NEXT: br label [[IFCONT4:%.*]] // CHECK-32-EX: else3: // CHECK-32-EX-NEXT: br label [[IFCONT4]] // CHECK-32-EX: ifcont4: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK-32-EX-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK-32-EX: then6: -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP19]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to i32* -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK-32-EX-NEXT: store volatile i32 [[TMP23]], i32 addrspace(3)* [[TMP22]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK-32-EX-NEXT: store volatile i32 [[TMP23]], ptr addrspace(3) [[TMP22]], align 4 // CHECK-32-EX-NEXT: br label [[IFCONT8:%.*]] // CHECK-32-EX: else7: // CHECK-32-EX-NEXT: br label [[IFCONT8]] // CHECK-32-EX: ifcont8: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP24]] // CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK-32-EX: then10: -// CHECK-32-EX-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-32-EX-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i8*, i8** [[TMP26]], align 4 -// CHECK-32-EX-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP27]] to i32* -// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP25]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 4 +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load volatile i32, ptr addrspace(3) [[TMP25]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 // CHECK-32-EX-NEXT: br label [[IFCONT12:%.*]] // CHECK-32-EX: else11: // CHECK-32-EX-NEXT: br label [[IFCONT12]] @@ -2014,101 +1854,95 @@ int bar(int n){ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35 -// CHECK-32-EX-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR0]] { +// CHECK-32-EX-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP1]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined to i8*), i8* null, i8** [[TMP8]], i32 2) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK-32-EX-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[A1]], align 4 +// CHECK-32-EX-NEXT: store i16 -32768, ptr [[B2]], align 2 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1]], align 4 // CHECK-32-EX-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK-32-EX-NEXT: store i32 [[OR]], i32* [[A1]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-32-EX-NEXT: store i32 [[OR]], ptr [[A1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2]], align 2 // CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2]], align 2 // CHECK-32-EX-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: // CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK-32-EX-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4) +// CHECK-32-EX-NEXT: store i16 [[CONV4]], ptr [[B2]], align 2 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store ptr [[A1]], ptr [[TMP7]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store ptr [[B2]], ptr [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP6]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4) // CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 // CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-32-EX: .omp.reduction.then: -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[A1]], align 4 // CHECK-32-EX-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK-32-EX-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK-32-EX-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP1]], align 2 // CHECK-32-EX-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i16, ptr [[B2]], align 2 // CHECK-32-EX-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 // CHECK-32-EX-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] // CHECK-32-EX-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] // CHECK-32-EX: cond.true9: -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP1]], align 2 // CHECK-32-EX-NEXT: br label [[COND_END11:%.*]] // CHECK-32-EX: cond.false10: -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i16, ptr [[B2]], align 2 // CHECK-32-EX-NEXT: br label [[COND_END11]] // CHECK-32-EX: cond.end11: // CHECK-32-EX-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK-32-EX-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 +// CHECK-32-EX-NEXT: store i16 [[COND12]], ptr [[TMP1]], align 2 // CHECK-32-EX-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) // CHECK-32-EX-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-32-EX: .omp.reduction.done: @@ -2116,56 +1950,49 @@ int bar(int n){ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3 -// CHECK-32-EX-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { +// CHECK-32-EX-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 // CHECK-32-EX-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 // CHECK-32-EX-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 -// CHECK-32-EX-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK-32-EX-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK-32-EX-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK-32-EX-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32** -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 // CHECK-32-EX-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK-32-EX-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 // CHECK-32-EX-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) -// CHECK-32-EX-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP21]], i8** [[TMP12]], align 4 -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i16** -// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i16*, i16** [[TMP23]], align 4 -// CHECK-32-EX-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 -// CHECK-32-EX-NEXT: [[TMP27:%.*]] = bitcast i16* [[TMP26]] to i8* -// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP24]], align 2 +// CHECK-32-EX-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK-32-EX-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2 // CHECK-32-EX-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 // CHECK-32-EX-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() // CHECK-32-EX-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 // CHECK-32-EX-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) // CHECK-32-EX-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 -// CHECK-32-EX-NEXT: store i16 [[TMP33]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK-32-EX-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 -// CHECK-32-EX-NEXT: [[TMP35:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK-32-EX-NEXT: [[TMP36:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP36]], i8** [[TMP25]], align 4 +// CHECK-32-EX-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK-32-EX-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 +// CHECK-32-EX-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK-32-EX-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 4 // CHECK-32-EX-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK-32-EX-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK-32-EX-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -2180,9 +2007,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] // CHECK-32-EX-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-32-EX: then: -// CHECK-32-EX-NEXT: [[TMP49:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK-32-EX-NEXT: [[TMP50:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK-32-EX-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined_omp$reduction$reduction_func"(i8* [[TMP49]], i8* [[TMP50]]) #[[ATTR3]] +// CHECK-32-EX-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3]] // CHECK-32-EX-NEXT: br label [[IFCONT:%.*]] // CHECK-32-EX: else: // CHECK-32-EX-NEXT: br label [[IFCONT]] @@ -2192,22 +2017,18 @@ int bar(int n){ // CHECK-32-EX-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] // CHECK-32-EX-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK-32-EX: then5: -// CHECK-32-EX-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32** -// CHECK-32-EX-NEXT: [[TMP56:%.*]] = load i32*, i32** [[TMP55]], align 4 -// CHECK-32-EX-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i32** -// CHECK-32-EX-NEXT: [[TMP59:%.*]] = load i32*, i32** [[TMP58]], align 4 -// CHECK-32-EX-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP60]], i32* [[TMP59]], align 4 -// CHECK-32-EX-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i16** -// CHECK-32-EX-NEXT: [[TMP63:%.*]] = load i16*, i16** [[TMP62]], align 4 -// CHECK-32-EX-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i16** -// CHECK-32-EX-NEXT: [[TMP66:%.*]] = load i16*, i16** [[TMP65]], align 4 -// CHECK-32-EX-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP63]], align 2 -// CHECK-32-EX-NEXT: store i16 [[TMP67]], i16* [[TMP66]], align 2 +// CHECK-32-EX-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 4 +// CHECK-32-EX-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 4 +// CHECK-32-EX-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4 +// CHECK-32-EX-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4 +// CHECK-32-EX-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 4 +// CHECK-32-EX-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2 // CHECK-32-EX-NEXT: br label [[IFCONT7:%.*]] // CHECK-32-EX: else6: // CHECK-32-EX-NEXT: br label [[IFCONT7]] @@ -2216,76 +2037,69 @@ int bar(int n){ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4 -// CHECK-32-EX-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-32-EX-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-EX-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 // CHECK-32-EX-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK-32-EX-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [2 x i8*]* -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK-32-EX-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK-32-EX: then: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK-32-EX-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK-32-EX-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 // CHECK-32-EX-NEXT: br label [[IFCONT:%.*]] // CHECK-32-EX: else: // CHECK-32-EX-NEXT: br label [[IFCONT]] // CHECK-32-EX: ifcont: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] // CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK-32-EX: then2: -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 // CHECK-32-EX-NEXT: br label [[IFCONT4:%.*]] // CHECK-32-EX: else3: // CHECK-32-EX-NEXT: br label [[IFCONT4]] // CHECK-32-EX: ifcont4: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK-32-EX-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK-32-EX: then6: -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP19]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to i16* -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = bitcast i32 addrspace(3)* [[TMP22]] to i16 addrspace(3)* -// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP21]], align 2 -// CHECK-32-EX-NEXT: store volatile i16 [[TMP24]], i16 addrspace(3)* [[TMP23]], align 2 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2 +// CHECK-32-EX-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2 // CHECK-32-EX-NEXT: br label [[IFCONT8:%.*]] // CHECK-32-EX: else7: // CHECK-32-EX-NEXT: br label [[IFCONT8]] // CHECK-32-EX: ifcont8: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] // CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK-32-EX: then10: -// CHECK-32-EX-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-32-EX-NEXT: [[TMP27:%.*]] = bitcast i32 addrspace(3)* [[TMP26]] to i16 addrspace(3)* -// CHECK-32-EX-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i8*, i8** [[TMP28]], align 4 -// CHECK-32-EX-NEXT: [[TMP30:%.*]] = bitcast i8* [[TMP29]] to i16* -// CHECK-32-EX-NEXT: [[TMP31:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP27]], align 2 -// CHECK-32-EX-NEXT: store i16 [[TMP31]], i16* [[TMP30]], align 2 +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 4 +// CHECK-32-EX-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2 // CHECK-32-EX-NEXT: br label [[IFCONT12:%.*]] // CHECK-32-EX: else11: // CHECK-32-EX-NEXT: br label [[IFCONT12]] diff --git a/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp b/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp index 1db4f26..f3027d7 100644 --- a/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp +++ b/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp @@ -1,8 +1,8 @@ // Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s // expected-no-diagnostics // CHECK-DAG: [[T:%.+]] = type {{.+}}, {{fp128|ppc_fp128}}, @@ -34,33 +34,32 @@ struct T1 { #pragma omp declare target T a = T(); T f = a; -// CHECK: define{{ hidden | }}void @{{.+}}foo{{.+}}([[T]]* noundef byval([[T]]) align {{.+}}) +// CHECK: define{{ hidden | }}void @{{.+}}foo{{.+}}(ptr noundef byval([[T]]) align {{.+}}) void foo(T a = T()) { return; } // CHECK: define{{ hidden | }}[6 x i64] @{{.+}}bar{{.+}}() T bar() { -// CHECK: bitcast [[T]]* %{{.+}} to [6 x i64]* -// CHECK-NEXT: load [6 x i64], [6 x i64]* %{{.+}}, +// CHECK: [[RETVAL:%.+]] = alloca [[T]] +// CHECK: load [6 x i64], ptr [[RETVAL]] // CHECK-NEXT: ret [6 x i64] return T(); } // CHECK: define{{ hidden | }}void @{{.+}}baz{{.+}}() void baz() { // CHECK: call [6 x i64] @{{.+}}bar{{.+}}() -// CHECK-NEXT: bitcast [[T]]* %{{.+}} to [6 x i64]* -// CHECK-NEXT: store [6 x i64] %{{.+}}, [6 x i64]* %{{.+}}, +// CHECK-NEXT: store [6 x i64] %{{.+}}, ptr %{{.+}}, T t = bar(); } T1 a1 = T1(); T1 f1 = a1; -// CHECK: define{{ hidden | }}void @{{.+}}foo1{{.+}}([[T1]]* noundef byval([[T1]]) align {{.+}}) +// CHECK: define{{ hidden | }}void @{{.+}}foo1{{.+}}(ptr noundef byval([[T1]]) align {{.+}}) void foo1(T1 a = T1()) { return; } // CHECK: define{{ hidden | }}[[T1]] @{{.+}}bar1{{.+}}() T1 bar1() { -// CHECK: load [[T1]], [[T1]]* +// CHECK: load [[T1]], ptr // CHECK-NEXT: ret [[T1]] return T1(); }