From ae1cf4577cab387658e4f5677e568adeb2dd4b9d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 4 Jan 2023 16:46:03 +0100 Subject: [PATCH] [OpenMP] Convert some tests to opaque pointers (NFC) --- llvm/test/Transforms/OpenMP/barrier_removal.ll | 156 +++---- .../Transforms/OpenMP/hide_mem_transfer_latency.ll | 464 +++++++++------------ .../OpenMP/parallel_deletion_cg_update.ll | 35 +- 3 files changed, 285 insertions(+), 370 deletions(-) diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll index 43d8518..a6d67d8 100644 --- a/llvm/test/Transforms/OpenMP/barrier_removal.ll +++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll @@ -14,13 +14,13 @@ declare void @llvm.assume(i1) ;. ; CHECK: @[[GC1:[a-zA-Z0-9_$"\\.-]+]] = constant i32 42 ; CHECK: @[[GC2:[a-zA-Z0-9_$"\\.-]+]] = addrspace(4) global i32 0 -; CHECK: @[[GPTR4:[a-zA-Z0-9_$"\\.-]+]] = addrspace(4) global i32 addrspace(4)* null +; CHECK: @[[GPTR4:[a-zA-Z0-9_$"\\.-]+]] = addrspace(4) global ptr addrspace(4) null ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = global i32 42 ; CHECK: @[[GS:[a-zA-Z0-9_$"\\.-]+]] = addrspace(3) global i32 0 -; CHECK: @[[GPTR:[a-zA-Z0-9_$"\\.-]+]] = global i32* null +; CHECK: @[[GPTR:[a-zA-Z0-9_$"\\.-]+]] = global ptr null ; CHECK: @[[PG1:[a-zA-Z0-9_$"\\.-]+]] = thread_local global i32 42 ; CHECK: @[[PG2:[a-zA-Z0-9_$"\\.-]+]] = addrspace(5) global i32 0 -; CHECK: @[[GPTR5:[a-zA-Z0-9_$"\\.-]+]] = global i32 addrspace(5)* null +; CHECK: @[[GPTR5:[a-zA-Z0-9_$"\\.-]+]] = global ptr addrspace(5) null ; CHECK: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global i32 42 ; CHECK: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global i32 0 ;. @@ -95,28 +95,28 @@ define void @neg_empty_2() { @GC1 = constant i32 42 @GC2 = addrspace(4) global i32 0 -@GPtr4 = addrspace(4) global i32 addrspace(4)* null +@GPtr4 = addrspace(4) global ptr addrspace(4) null define void @pos_constant_loads() { ; CHECK-LABEL: define {{[^@]+}}@pos_constant_loads() { -; CHECK-NEXT: [[ARG:%.*]] = load i32 addrspace(4)*, i32 addrspace(4)** addrspacecast (i32 addrspace(4)* addrspace(4)* @GPtr4 to i32 addrspace(4)**), align 8 -; CHECK-NEXT: [[B:%.*]] = load i32, i32* addrspacecast (i32 addrspace(4)* @GC2 to i32*), align 4 -; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast i32 addrspace(4)* [[ARG]] to i32* -; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[ARGC]], align 4 +; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(4), ptr addrspacecast (ptr addrspace(4) @GPtr4 to ptr), align 8 +; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @GC2 to ptr), align 4 +; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast ptr addrspace(4) [[ARG]] to ptr +; CHECK-NEXT: [[C:%.*]] = load i32, ptr [[ARGC]], align 4 ; CHECK-NEXT: call void @aligned_barrier() ; CHECK-NEXT: [[D:%.*]] = add i32 42, [[B]] ; CHECK-NEXT: [[E:%.*]] = add i32 [[D]], [[C]] ; CHECK-NEXT: call void @useI32(i32 [[E]]) ; CHECK-NEXT: ret void ; - %GPtr4c = addrspacecast i32 addrspace(4)*addrspace(4)* @GPtr4 to i32 addrspace(4)** - %arg = load i32 addrspace(4)*, i32 addrspace(4)** %GPtr4c - %a = load i32, i32* @GC1 + %GPtr4c = addrspacecast ptr addrspace(4) @GPtr4 to ptr + %arg = load ptr addrspace(4), ptr %GPtr4c + %a = load i32, ptr @GC1 call void @aligned_barrier() - %GC2c = addrspacecast i32 addrspace(4)* @GC2 to i32* - %b = load i32, i32* %GC2c + %GC2c = addrspacecast ptr addrspace(4) @GC2 to ptr + %b = load i32, ptr %GC2c call void @aligned_barrier() - %argc = addrspacecast i32 addrspace(4)* %arg to i32* - %c = load i32, i32* %argc + %argc = addrspacecast ptr addrspace(4) %arg to ptr + %c = load i32, ptr %argc call void @aligned_barrier() %d = add i32 %a, %b %e = add i32 %d, %c @@ -125,29 +125,29 @@ define void @pos_constant_loads() { } @G = global i32 42 @GS = addrspace(3) global i32 0 -@GPtr = global i32* null +@GPtr = global ptr null ; TODO: We could remove some of the barriers due to the lack of write effects. define void @neg_loads() { ; CHECK-LABEL: define {{[^@]+}}@neg_loads() { -; CHECK-NEXT: [[ARG:%.*]] = load i32*, i32** @GPtr, align 8 -; CHECK-NEXT: [[A:%.*]] = load i32, i32* @G, align 4 +; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8 +; CHECK-NEXT: [[A:%.*]] = load i32, ptr @G, align 4 ; CHECK-NEXT: call void @aligned_barrier() -; CHECK-NEXT: [[B:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @GS to i32*), align 4 +; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @GS to ptr), align 4 ; CHECK-NEXT: call void @aligned_barrier() -; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK-NEXT: [[C:%.*]] = load i32, ptr [[ARG]], align 4 ; CHECK-NEXT: call void @aligned_barrier() ; CHECK-NEXT: [[D:%.*]] = add i32 [[A]], [[B]] ; CHECK-NEXT: [[E:%.*]] = add i32 [[D]], [[C]] ; CHECK-NEXT: call void @useI32(i32 [[E]]) ; CHECK-NEXT: ret void ; - %arg = load i32*, i32** @GPtr - %a = load i32, i32* @G + %arg = load ptr, ptr @GPtr + %a = load i32, ptr @G call void @aligned_barrier() - %GSc = addrspacecast i32 addrspace(3)* @GS to i32* - %b = load i32, i32* %GSc + %GSc = addrspacecast ptr addrspace(3) @GS to ptr + %b = load i32, ptr %GSc call void @aligned_barrier() - %c = load i32, i32* %arg + %c = load i32, ptr %arg call void @aligned_barrier() %d = add i32 %a, %b %e = add i32 %d, %c @@ -156,34 +156,34 @@ define void @neg_loads() { } @PG1 = thread_local global i32 42 @PG2 = addrspace(5) global i32 0 -@GPtr5 = global i32 addrspace(5)* null +@GPtr5 = global ptr addrspace(5) null define void @pos_priv_mem() { ; CHECK-LABEL: define {{[^@]+}}@pos_priv_mem() { -; CHECK-NEXT: [[ARG:%.*]] = load i32 addrspace(5)*, i32 addrspace(5)** @GPtr5, align 8 +; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(5), ptr @GPtr5, align 8 ; CHECK-NEXT: [[LOC:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[A:%.*]] = load i32, i32* @PG1, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[LOC]], align 4 -; CHECK-NEXT: [[B:%.*]] = load i32, i32* addrspacecast (i32 addrspace(5)* @PG2 to i32*), align 4 +; CHECK-NEXT: [[A:%.*]] = load i32, ptr @PG1, align 4 +; CHECK-NEXT: store i32 [[A]], ptr [[LOC]], align 4 +; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(5) @PG2 to ptr), align 4 ; CHECK-NEXT: call void @aligned_barrier() -; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast i32 addrspace(5)* [[ARG]] to i32* -; CHECK-NEXT: store i32 [[B]], i32* [[ARGC]], align 4 -; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[LOC]], align 4 -; CHECK-NEXT: store i32 [[V]], i32* @PG1, align 4 +; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast ptr addrspace(5) [[ARG]] to ptr +; CHECK-NEXT: store i32 [[B]], ptr [[ARGC]], align 4 +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[LOC]], align 4 +; CHECK-NEXT: store i32 [[V]], ptr @PG1, align 4 ; CHECK-NEXT: ret void ; - %arg = load i32 addrspace(5)*, i32 addrspace(5)** @GPtr5 + %arg = load ptr addrspace(5), ptr @GPtr5 %loc = alloca i32 - %a = load i32, i32* @PG1 + %a = load i32, ptr @PG1 call void @aligned_barrier() - store i32 %a, i32* %loc - %PG2c = addrspacecast i32 addrspace(5)* @PG2 to i32* - %b = load i32, i32* %PG2c + store i32 %a, ptr %loc + %PG2c = addrspacecast ptr addrspace(5) @PG2 to ptr + %b = load i32, ptr %PG2c call void @aligned_barrier() - %argc = addrspacecast i32 addrspace(5)* %arg to i32* - store i32 %b, i32* %argc + %argc = addrspacecast ptr addrspace(5) %arg to ptr + store i32 %b, ptr %argc call void @aligned_barrier() - %v = load i32, i32* %loc - store i32 %v, i32* @PG1 + %v = load i32, ptr %loc + store i32 %v, ptr @PG1 call void @aligned_barrier() ret void } @@ -191,23 +191,23 @@ define void @pos_priv_mem() { @G2 = addrspace(1) global i32 0 define void @neg_mem() { ; CHECK-LABEL: define {{[^@]+}}@neg_mem() { -; CHECK-NEXT: [[ARG:%.*]] = load i32*, i32** @GPtr, align 8 -; CHECK-NEXT: [[A:%.*]] = load i32, i32* @G1, align 4 +; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8 +; CHECK-NEXT: [[A:%.*]] = load i32, ptr @G1, align 4 ; CHECK-NEXT: call void @aligned_barrier() -; CHECK-NEXT: store i32 [[A]], i32* [[ARG]], align 4 +; CHECK-NEXT: store i32 [[A]], ptr [[ARG]], align 4 ; CHECK-NEXT: call void @aligned_barrier() -; CHECK-NEXT: [[B:%.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @G2 to i32*), align 4 -; CHECK-NEXT: store i32 [[B]], i32* @G1, align 4 +; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @G2 to ptr), align 4 +; CHECK-NEXT: store i32 [[B]], ptr @G1, align 4 ; CHECK-NEXT: ret void ; - %arg = load i32*, i32** @GPtr - %a = load i32, i32* @G1 + %arg = load ptr, ptr @GPtr + %a = load i32, ptr @G1 call void @aligned_barrier() - store i32 %a, i32* %arg + store i32 %a, ptr %arg call void @aligned_barrier() - %G2c = addrspacecast i32 addrspace(1)* @G2 to i32* - %b = load i32, i32* %G2c - store i32 %b, i32* @G1 + %G2c = addrspacecast ptr addrspace(1) @G2 to ptr + %b = load i32, ptr %G2c + store i32 %b, ptr @G1 call void @aligned_barrier() ret void } @@ -231,18 +231,18 @@ define void @pos_multiple() { !llvm.module.flags = !{!12,!13} !nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11} -!0 = !{void ()* @pos_empty_1, !"kernel", i32 1} -!1 = !{void ()* @pos_empty_2, !"kernel", i32 1} -!2 = !{void ()* @pos_empty_3, !"kernel", i32 1} -!3 = !{void ()* @pos_empty_4, !"kernel", i32 1} -!4 = !{void ()* @pos_empty_5, !"kernel", i32 1} -!5 = !{void ()* @pos_empty_6, !"kernel", i32 1} -!6 = !{void ()* @neg_empty_7, !"kernel", i32 1} -!7 = !{void ()* @pos_constant_loads, !"kernel", i32 1} -!8 = !{void ()* @neg_loads, !"kernel", i32 1} -!9 = !{void ()* @pos_priv_mem, !"kernel", i32 1} -!10 = !{void ()* @neg_mem, !"kernel", i32 1} -!11 = !{void ()* @pos_multiple, !"kernel", i32 1} +!0 = !{ptr @pos_empty_1, !"kernel", i32 1} +!1 = !{ptr @pos_empty_2, !"kernel", i32 1} +!2 = !{ptr @pos_empty_3, !"kernel", i32 1} +!3 = !{ptr @pos_empty_4, !"kernel", i32 1} +!4 = !{ptr @pos_empty_5, !"kernel", i32 1} +!5 = !{ptr @pos_empty_6, !"kernel", i32 1} +!6 = !{ptr @neg_empty_7, !"kernel", i32 1} +!7 = !{ptr @pos_constant_loads, !"kernel", i32 1} +!8 = !{ptr @neg_loads, !"kernel", i32 1} +!9 = !{ptr @pos_priv_mem, !"kernel", i32 1} +!10 = !{ptr @neg_mem, !"kernel", i32 1} +!11 = !{ptr @pos_multiple, !"kernel", i32 1} !12 = !{i32 7, !"openmp", i32 50} !13 = !{i32 7, !"openmp-device", i32 50} ;. @@ -253,16 +253,16 @@ define void @pos_multiple() { ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META2:![0-9]+]] = !{void ()* @pos_empty_1, !"kernel", i32 1} -; CHECK: [[META3:![0-9]+]] = !{void ()* @pos_empty_2, !"kernel", i32 1} -; CHECK: [[META4:![0-9]+]] = !{void ()* @pos_empty_3, !"kernel", i32 1} -; CHECK: [[META5:![0-9]+]] = !{void ()* @pos_empty_4, !"kernel", i32 1} -; CHECK: [[META6:![0-9]+]] = !{void ()* @pos_empty_5, !"kernel", i32 1} -; CHECK: [[META7:![0-9]+]] = !{void ()* @pos_empty_6, !"kernel", i32 1} -; CHECK: [[META8:![0-9]+]] = !{void ()* @neg_empty_7, !"kernel", i32 1} -; CHECK: [[META9:![0-9]+]] = !{void ()* @pos_constant_loads, !"kernel", i32 1} -; CHECK: [[META10:![0-9]+]] = !{void ()* @neg_loads, !"kernel", i32 1} -; CHECK: [[META11:![0-9]+]] = !{void ()* @pos_priv_mem, !"kernel", i32 1} -; CHECK: [[META12:![0-9]+]] = !{void ()* @neg_mem, !"kernel", i32 1} -; CHECK: [[META13:![0-9]+]] = !{void ()* @pos_multiple, !"kernel", i32 1} +; CHECK: [[META2:![0-9]+]] = !{ptr @pos_empty_1, !"kernel", i32 1} +; CHECK: [[META3:![0-9]+]] = !{ptr @pos_empty_2, !"kernel", i32 1} +; CHECK: [[META4:![0-9]+]] = !{ptr @pos_empty_3, !"kernel", i32 1} +; CHECK: [[META5:![0-9]+]] = !{ptr @pos_empty_4, !"kernel", i32 1} +; CHECK: [[META6:![0-9]+]] = !{ptr @pos_empty_5, !"kernel", i32 1} +; CHECK: [[META7:![0-9]+]] = !{ptr @pos_empty_6, !"kernel", i32 1} +; CHECK: [[META8:![0-9]+]] = !{ptr @neg_empty_7, !"kernel", i32 1} +; CHECK: [[META9:![0-9]+]] = !{ptr @pos_constant_loads, !"kernel", i32 1} +; CHECK: [[META10:![0-9]+]] = !{ptr @neg_loads, !"kernel", i32 1} +; CHECK: [[META11:![0-9]+]] = !{ptr @pos_priv_mem, !"kernel", i32 1} +; CHECK: [[META12:![0-9]+]] = !{ptr @neg_mem, !"kernel", i32 1} +; CHECK: [[META13:![0-9]+]] = !{ptr @pos_multiple, !"kernel", i32 1} ;. diff --git a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll index 1a342cc..678acd5 100644 --- a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll +++ b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll @@ -2,10 +2,10 @@ ; RUN: opt -S -passes=openmp-opt-cgscc -aa-pipeline=basic-aa -openmp-hide-memory-transfer-latency < %s | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -; CHECK: %struct.__tgt_async_info = type { i8* } +; CHECK: %struct.__tgt_async_info = type { ptr } -%struct.ident_t = type { i32, i32, i32, i32, i8* } -%struct.__tgt_offload_entry = type { i8*, i8*, i64, i32, i32 } +%struct.ident_t = type { i32, i32, i32, i32, ptr } +%struct.__tgt_offload_entry = type { ptr, ptr, i64, i32, i32 } @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 35] @.__omp_offloading_heavyComputation1.region_id = weak constant i8 0 @@ -21,7 +21,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 @.offload_maptypes.5 = private unnamed_addr constant [1 x i64] [i64 33] -@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 +@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, ptr @.str0 }, align 8 @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 ;double heavyComputation1() { @@ -29,7 +29,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 ; double random = rand(); ; ; //#pragma omp target data map(a) -; void* args[1]; +; ptr args[1]; ; args[0] = &a; ; __tgt_target_data_begin(..., args, ...) ; @@ -44,44 +44,32 @@ define dso_local double @heavyComputation1() { ; CHECK-LABEL: define {{[^@]+}}@heavyComputation1() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca double, align 8 -; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8 -; CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8 -; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [1 x i8*], align 8 -; CHECK-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [1 x i8*], align 8 -; CHECK-NEXT: [[HANDLE:%.*]] = alloca [[STRUCT___TGT_ASYNC_INFO:%.*]], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to i8* +; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [1 x ptr], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [1 x ptr], align 8 ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @rand() ; CHECK-NEXT: [[REM:%.*]] = srem i32 [[CALL]], 777 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to double -; CHECK-NEXT: store double [[CONV]], double* [[A]], align 8 +; CHECK-NEXT: store double [[CONV]], ptr [[A]], align 8 ; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 (...) @rand() -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]] to double** -; CHECK-NEXT: store double* [[A]], double** [[TMP2]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_PTRS]] to double** -; CHECK-NEXT: store double* [[A]], double** [[TMP4]], align 8 -; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_issue(%struct.ident_t* @[[GLOB0:[0-9]+]], i64 -1, i32 1, i8** [[TMP1]], i8** [[TMP3]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null, i8** null, %struct.__tgt_async_info* [[HANDLE]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[A]] to i64* -; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info* [[HANDLE]]) -; CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i64 0, i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_BASEPTRS4]] to i64* -; CHECK-NEXT: store i64 [[TMP6]], i64* [[TMP8]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS5]], i64 0, i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_PTRS5]] to i64* -; CHECK-NEXT: store i64 [[TMP6]], i64* [[TMP10]], align 8 -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i8* nonnull @.__omp_offloading_heavyComputation1.region_id, i32 1, i8** nonnull [[TMP7]], i8** nonnull [[TMP9]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0) -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: store ptr [[A]], ptr [[DOTOFFLOAD_BASEPTRS]], align 8 +; CHECK-NEXT: store ptr [[A]], ptr [[DOTOFFLOAD_PTRS]], align 8 +; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB0:[0-9]+]], i64 -1, i32 1, ptr nonnull [[DOTOFFLOAD_BASEPTRS]], ptr nonnull [[DOTOFFLOAD_PTRS]], ptr @.offload_sizes.1, ptr @.offload_maptypes, ptr null, ptr null) +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: store i64 [[TMP0]], ptr [[DOTOFFLOAD_BASEPTRS4]], align 8 +; CHECK-NEXT: store i64 [[TMP0]], ptr [[DOTOFFLOAD_PTRS5]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(ptr @[[GLOB0]], i64 -1, ptr nonnull @.__omp_offloading_heavyComputation1.region_id, i32 1, ptr nonnull [[DOTOFFLOAD_BASEPTRS4]], ptr nonnull [[DOTOFFLOAD_PTRS5]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr null, ptr null, i32 0, i32 0) +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[DOTNOT]], label [[OMP_OFFLOAD_CONT:%.*]], label [[OMP_OFFLOAD_FAILED:%.*]] ; CHECK: omp_offload.failed: -; CHECK-NEXT: call void @heavyComputation1FallBack(i64 [[TMP6]]) +; CHECK-NEXT: call void @heavyComputation1FallBack(i64 [[TMP0]]) ; CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]] ; CHECK: omp_offload.cont: ; CHECK-NEXT: [[CONV2:%.*]] = sitofp i32 [[CALL1]] to double -; CHECK-NEXT: call void @__tgt_target_data_end_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 1, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null, i8** null) -; CHECK-NEXT: [[TMP12:%.*]] = load double, double* [[A]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[CONV2]] +; CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB0]], i64 -1, i32 1, ptr nonnull [[DOTOFFLOAD_BASEPTRS]], ptr nonnull [[DOTOFFLOAD_PTRS]], ptr @.offload_sizes.1, ptr @.offload_maptypes, ptr null, ptr null) +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[A]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP2]], [[CONV2]] ; CHECK-NEXT: ret double [[ADD]] ; @@ -92,51 +80,41 @@ define dso_local double @heavyComputation1() { entry: %a = alloca double, align 8 - %.offload_baseptrs = alloca [1 x i8*], align 8 - %.offload_ptrs = alloca [1 x i8*], align 8 - %.offload_baseptrs4 = alloca [1 x i8*], align 8 - %.offload_ptrs5 = alloca [1 x i8*], align 8 + %.offload_baseptrs = alloca [1 x ptr], align 8 + %.offload_ptrs = alloca [1 x ptr], align 8 + %.offload_baseptrs4 = alloca [1 x ptr], align 8 + %.offload_ptrs5 = alloca [1 x ptr], align 8 - %0 = bitcast double* %a to i8* %call = tail call i32 (...) @rand() %rem = srem i32 %call, 777 %conv = sitofp i32 %rem to double - store double %conv, double* %a, align 8 + store double %conv, ptr %a, align 8 - ; FIXME: call to @__tgt_target_data_begin_mapper_issue(%struct.ident_t* @0, ...) should be moved here. + ; FIXME: call to @__tgt_target_data_begin_mapper_issue(ptr @0, ...) should be moved here. %call1 = tail call i32 (...) @rand() - %1 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_baseptrs, i64 0, i64 0 - %2 = bitcast [1 x i8*]* %.offload_baseptrs to double** - store double* %a, double** %2, align 8 - %3 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs, i64 0, i64 0 - %4 = bitcast [1 x i8*]* %.offload_ptrs to double** - store double* %a, double** %4, align 8 - call void @__tgt_target_data_begin_mapper(%struct.ident_t* @0, i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null, i8** null) - - %5 = bitcast double* %a to i64* - %6 = load i64, i64* %5, align 8 - %7 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_baseptrs4, i64 0, i64 0 - %8 = bitcast [1 x i8*]* %.offload_baseptrs4 to i64* - store i64 %6, i64* %8, align 8 - %9 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs5, i64 0, i64 0 - %10 = bitcast [1 x i8*]* %.offload_ptrs5 to i64* - store i64 %6, i64* %10, align 8 + store ptr %a, ptr %.offload_baseptrs, align 8 + store ptr %a, ptr %.offload_ptrs, align 8 + call void @__tgt_target_data_begin_mapper(ptr @0, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr @.offload_sizes.1, ptr @.offload_maptypes, ptr null, ptr null) + + %0 = load i64, ptr %a, align 8 + store i64 %0, ptr %.offload_baseptrs4, align 8 + store i64 %0, ptr %.offload_ptrs5, align 8 ; FIXME: call to @__tgt_target_data_begin_mapper_wait(...) should be moved here. - %11 = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @0, i64 -1, i8* nonnull @.__omp_offloading_heavyComputation1.region_id, i32 1, i8** nonnull %7, i8** nonnull %9, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0) - %.not = icmp eq i32 %11, 0 + %1 = call i32 @__tgt_target_teams_mapper(ptr @0, i64 -1, ptr nonnull @.__omp_offloading_heavyComputation1.region_id, i32 1, ptr nonnull %.offload_baseptrs4, ptr nonnull %.offload_ptrs5, ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr null, ptr null, i32 0, i32 0) + %.not = icmp eq i32 %1, 0 br i1 %.not, label %omp_offload.cont, label %omp_offload.failed omp_offload.failed: ; preds = %entry - call void @heavyComputation1FallBack(i64 %6) + call void @heavyComputation1FallBack(i64 %0) br label %omp_offload.cont omp_offload.cont: ; preds = %omp_offload.failed, %entry %conv2 = sitofp i32 %call1 to double - call void @__tgt_target_data_end_mapper(%struct.ident_t* @0, i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null, i8** null) - %12 = load double, double* %a, align 8 - %add = fadd double %12, %conv2 + call void @__tgt_target_data_end_mapper(ptr @0, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr @.offload_sizes.1, ptr @.offload_maptypes, ptr null, ptr null) + %2 = load double, ptr %a, align 8 + %add = fadd double %2, %conv2 ret double %add } @@ -151,144 +129,118 @@ entry: ret void } -;int heavyComputation2(double* a, unsigned size) { +;int heavyComputation2(ptr a, unsigned size) { ; int random = rand() % 7; ; ; //#pragma omp target data map(a[0:size], size) -; void* args[2]; +; ptr args[2]; ; args[0] = &a; ; args[1] = &size; ; __tgt_target_data_begin(..., args, ...) ; ; #pragma omp target teams ; for (int i = 0; i < size; ++i) { -; a[i] = ++a[i] * 3.141624; +; a[i] = ++aptr 3.141624; ; } ; ; return random; ;} -define dso_local i32 @heavyComputation2(double* %a, i32 %size) { +define dso_local i32 @heavyComputation2(ptr %a, i32 %size) { ; CHECK-LABEL: define {{[^@]+}}@heavyComputation2 -; CHECK-SAME: (double* [[A:%.*]], i32 [[SIZE:%.*]]) { +; CHECK-SAME: (ptr [[A:%.*]], i32 [[SIZE:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 -; CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8 ; CHECK-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 8 -; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x i8*], align 8 -; CHECK-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x i8*], align 8 -; CHECK-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4 +; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 8 +; CHECK-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @rand() ; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[SIZE]] to i64 ; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[CONV]], 3 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]] to double** -; CHECK-NEXT: store double* [[A]], double** [[TMP2]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_PTRS]] to double** -; CHECK-NEXT: store double* [[A]], double** [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i64 0, i64 0 -; CHECK-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 1 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** -; CHECK-NEXT: store i32* [[SIZE_ADDR]], i32** [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32** -; CHECK-NEXT: store i32* [[SIZE_ADDR]], i32** [[TMP9]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i64 0, i64 1 -; CHECK-NEXT: store i64 4, i64* [[TMP10]], align 8 -; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 2, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* nonnull [[TMP5]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null) -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 -; CHECK-NEXT: [[SIZE_CASTED:%.*]] = zext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i64 0, i64 0 -; CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]] to i64* -; CHECK-NEXT: store i64 [[SIZE_CASTED]], i64* [[TMP13]], align 8 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i64 0, i64 0 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_PTRS3]] to i64* -; CHECK-NEXT: store i64 [[SIZE_CASTED]], i64* [[TMP15]], align 8 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i64 0, i64 1 -; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to double** -; CHECK-NEXT: store double* [[A]], double** [[TMP17]], align 8 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i64 0, i64 1 -; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to double** -; CHECK-NEXT: store double* [[A]], double** [[TMP19]], align 8 -; CHECK-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i8* nonnull @.__omp_offloading_heavyComputation2.region_id, i32 2, i8** nonnull [[TMP12]], i8** nonnull [[TMP14]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.2, i64 0, i64 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0) -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP20]], 0 +; CHECK-NEXT: store ptr [[A]], ptr [[DOTOFFLOAD_BASEPTRS]], align 8 +; CHECK-NEXT: store ptr [[A]], ptr [[DOTOFFLOAD_PTRS]], align 8 +; CHECK-NEXT: store i64 [[TMP0]], ptr [[DOTOFFLOAD_SIZES]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 1 +; CHECK-NEXT: store ptr [[SIZE_ADDR]], ptr [[TMP1]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i64 0, i64 1 +; CHECK-NEXT: store ptr [[SIZE_ADDR]], ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i64 0, i64 1 +; CHECK-NEXT: store i64 4, ptr [[TMP3]], align 8 +; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB0]], i64 -1, i32 2, ptr nonnull [[DOTOFFLOAD_BASEPTRS]], ptr nonnull [[DOTOFFLOAD_PTRS]], ptr nonnull [[DOTOFFLOAD_SIZES]], ptr @.offload_maptypes.3, ptr null, ptr null) +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +; CHECK-NEXT: [[SIZE_CASTED:%.*]] = zext i32 [[TMP4]] to i64 +; CHECK-NEXT: store i64 [[SIZE_CASTED]], ptr [[DOTOFFLOAD_BASEPTRS2]], align 8 +; CHECK-NEXT: store i64 [[SIZE_CASTED]], ptr [[DOTOFFLOAD_PTRS3]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i64 0, i64 1 +; CHECK-NEXT: store ptr [[A]], ptr [[TMP5]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i64 0, i64 1 +; CHECK-NEXT: store ptr [[A]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(ptr @[[GLOB0]], i64 -1, ptr nonnull @.__omp_offloading_heavyComputation2.region_id, i32 2, ptr nonnull [[DOTOFFLOAD_BASEPTRS2]], ptr nonnull [[DOTOFFLOAD_PTRS3]], ptr @.offload_sizes.2, ptr @.offload_maptypes.4, ptr null, ptr null, i32 0, i32 0) +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP7]], 0 ; CHECK-NEXT: br i1 [[DOTNOT]], label [[OMP_OFFLOAD_CONT:%.*]], label [[OMP_OFFLOAD_FAILED:%.*]] ; CHECK: omp_offload.failed: -; CHECK-NEXT: call void @heavyComputation2FallBack(i64 [[SIZE_CASTED]], double* [[A]]) +; CHECK-NEXT: call void @heavyComputation2FallBack(i64 [[SIZE_CASTED]], ptr [[A]]) ; CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]] ; CHECK: omp_offload.cont: ; CHECK-NEXT: [[REM:%.*]] = srem i32 [[CALL]], 7 -; CHECK-NEXT: call void @__tgt_target_data_end_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 2, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* nonnull [[TMP5]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null) +; CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB0]], i64 -1, i32 2, ptr nonnull [[DOTOFFLOAD_BASEPTRS]], ptr nonnull [[DOTOFFLOAD_PTRS]], ptr nonnull [[DOTOFFLOAD_SIZES]], ptr @.offload_maptypes.3, ptr null, ptr null) ; CHECK-NEXT: ret i32 [[REM]] ; entry: %size.addr = alloca i32, align 4 - %.offload_baseptrs = alloca [2 x i8*], align 8 - %.offload_ptrs = alloca [2 x i8*], align 8 + %.offload_baseptrs = alloca [2 x ptr], align 8 + %.offload_ptrs = alloca [2 x ptr], align 8 %.offload_sizes = alloca [2 x i64], align 8 - %.offload_baseptrs2 = alloca [2 x i8*], align 8 - %.offload_ptrs3 = alloca [2 x i8*], align 8 + %.offload_baseptrs2 = alloca [2 x ptr], align 8 + %.offload_ptrs3 = alloca [2 x ptr], align 8 - store i32 %size, i32* %size.addr, align 4 + store i32 %size, ptr %size.addr, align 4 %call = tail call i32 (...) @rand() %conv = zext i32 %size to i64 %0 = shl nuw nsw i64 %conv, 3 - %1 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 0 - %2 = bitcast [2 x i8*]* %.offload_baseptrs to double** - store double* %a, double** %2, align 8 - %3 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 0 - %4 = bitcast [2 x i8*]* %.offload_ptrs to double** - store double* %a, double** %4, align 8 - %5 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 0 - store i64 %0, i64* %5, align 8 - %6 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 1 - %7 = bitcast i8** %6 to i32** - store i32* %size.addr, i32** %7, align 8 - %8 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 1 - %9 = bitcast i8** %8 to i32** - store i32* %size.addr, i32** %9, align 8 - %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1 - store i64 4, i64* %10, align 8 - call void @__tgt_target_data_begin_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null) - - %11 = load i32, i32* %size.addr, align 4 - %size.casted = zext i32 %11 to i64 - %12 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs2, i64 0, i64 0 - %13 = bitcast [2 x i8*]* %.offload_baseptrs2 to i64* - store i64 %size.casted, i64* %13, align 8 - %14 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs3, i64 0, i64 0 - %15 = bitcast [2 x i8*]* %.offload_ptrs3 to i64* - store i64 %size.casted, i64* %15, align 8 - %16 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs2, i64 0, i64 1 - %17 = bitcast i8** %16 to double** - store double* %a, double** %17, align 8 - %18 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs3, i64 0, i64 1 - %19 = bitcast i8** %18 to double** - store double* %a, double** %19, align 8 + store ptr %a, ptr %.offload_baseptrs, align 8 + store ptr %a, ptr %.offload_ptrs, align 8 + store i64 %0, ptr %.offload_sizes, align 8 + %1 = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i64 0, i64 1 + store ptr %size.addr, ptr %1, align 8 + %2 = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i64 0, i64 1 + store ptr %size.addr, ptr %2, align 8 + %3 = getelementptr inbounds [2 x i64], ptr %.offload_sizes, i64 0, i64 1 + store i64 4, ptr %3, align 8 + call void @__tgt_target_data_begin_mapper(ptr @0, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes.3, ptr null, ptr null) + + %4 = load i32, ptr %size.addr, align 4 + %size.casted = zext i32 %4 to i64 + store i64 %size.casted, ptr %.offload_baseptrs2, align 8 + store i64 %size.casted, ptr %.offload_ptrs3, align 8 + %5 = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs2, i64 0, i64 1 + store ptr %a, ptr %5, align 8 + %6 = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs3, i64 0, i64 1 + store ptr %a, ptr %6, align 8 ; FIXME: call to @__tgt_target_data_begin_mapper_wait(...) should be moved here. - %20 = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @0, i64 -1, i8* nonnull @.__omp_offloading_heavyComputation2.region_id, i32 2, i8** nonnull %12, i8** nonnull %14, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.2, i64 0, i64 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0) - %.not = icmp eq i32 %20, 0 + %7 = call i32 @__tgt_target_teams_mapper(ptr @0, i64 -1, ptr nonnull @.__omp_offloading_heavyComputation2.region_id, i32 2, ptr nonnull %.offload_baseptrs2, ptr nonnull %.offload_ptrs3, ptr @.offload_sizes.2, ptr @.offload_maptypes.4, ptr null, ptr null, i32 0, i32 0) + %.not = icmp eq i32 %7, 0 br i1 %.not, label %omp_offload.cont, label %omp_offload.failed omp_offload.failed: ; preds = %entry - call void @heavyComputation2FallBack(i64 %size.casted, double* %a) + call void @heavyComputation2FallBack(i64 %size.casted, ptr %a) br label %omp_offload.cont omp_offload.cont: ; preds = %omp_offload.failed, %entry %rem = srem i32 %call, 7 - call void @__tgt_target_data_end_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null) + call void @__tgt_target_data_end_mapper(ptr @0, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes.3, ptr null, ptr null) ret i32 %rem } -define internal void @heavyComputation2FallBack(i64 %size, double* %a) { +define internal void @heavyComputation2FallBack(i64 %size, ptr %a) { ; CHECK-LABEL: define {{[^@]+}}@heavyComputation2FallBack -; CHECK-SAME: (i64 [[SIZE:%.*]], double* [[A:%.*]]) { +; CHECK-SAME: (i64 [[SIZE:%.*]], ptr [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; @@ -297,145 +249,119 @@ entry: ret void } -;int heavyComputation3(double* restrict a, unsigned size) { +;int heavyComputation3(ptr restrict a, unsigned size) { ; int random = rand() % 7; ; ; //#pragma omp target data map(a[0:size], size) -; void* args[2]; +; ptr args[2]; ; args[0] = &a; ; args[1] = &size; ; __tgt_target_data_begin(..., args, ...) ; ; #pragma omp target teams ; for (int i = 0; i < size; ++i) { -; a[i] = ++a[i] * 3.141624; +; a[i] = ++aptr 3.141624; ; } ; ; return random; ;} -define dso_local i32 @heavyComputation3(double* noalias %a, i32 %size) { +define dso_local i32 @heavyComputation3(ptr noalias %a, i32 %size) { ; CHECK-LABEL: define {{[^@]+}}@heavyComputation3 -; CHECK-SAME: (double* noalias [[A:%.*]], i32 [[SIZE:%.*]]) { +; CHECK-SAME: (ptr noalias [[A:%.*]], i32 [[SIZE:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 -; CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8 ; CHECK-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 8 -; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x i8*], align 8 -; CHECK-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x i8*], align 8 -; CHECK-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4 +; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 8 +; CHECK-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @rand() ; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[SIZE]] to i64 ; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[CONV]], 3 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]] to double** -; CHECK-NEXT: store double* [[A]], double** [[TMP2]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_PTRS]] to double** -; CHECK-NEXT: store double* [[A]], double** [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i64 0, i64 0 -; CHECK-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 1 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** -; CHECK-NEXT: store i32* [[SIZE_ADDR]], i32** [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32** -; CHECK-NEXT: store i32* [[SIZE_ADDR]], i32** [[TMP9]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i64 0, i64 1 -; CHECK-NEXT: store i64 4, i64* [[TMP10]], align 8 -; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 2, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* nonnull [[TMP5]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null) -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 -; CHECK-NEXT: [[SIZE_CASTED:%.*]] = zext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i64 0, i64 0 -; CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]] to i64* -; CHECK-NEXT: store i64 [[SIZE_CASTED]], i64* [[TMP13]], align 8 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i64 0, i64 0 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_PTRS3]] to i64* -; CHECK-NEXT: store i64 [[SIZE_CASTED]], i64* [[TMP15]], align 8 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i64 0, i64 1 -; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to double** -; CHECK-NEXT: store double* [[A]], double** [[TMP17]], align 8 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i64 0, i64 1 -; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to double** -; CHECK-NEXT: store double* [[A]], double** [[TMP19]], align 8 -; CHECK-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i8* nonnull @.__omp_offloading_heavyComputation3.region_id, i32 2, i8** nonnull [[TMP12]], i8** nonnull [[TMP14]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.2, i64 0, i64 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0) -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP20]], 0 +; CHECK-NEXT: store ptr [[A]], ptr [[DOTOFFLOAD_BASEPTRS]], align 8 +; CHECK-NEXT: store ptr [[A]], ptr [[DOTOFFLOAD_PTRS]], align 8 +; CHECK-NEXT: store i64 [[TMP0]], ptr [[DOTOFFLOAD_SIZES]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 1 +; CHECK-NEXT: store ptr [[SIZE_ADDR]], ptr [[TMP1]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i64 0, i64 1 +; CHECK-NEXT: store ptr [[SIZE_ADDR]], ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i64 0, i64 1 +; CHECK-NEXT: store i64 4, ptr [[TMP3]], align 8 +; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB0]], i64 -1, i32 2, ptr nonnull [[DOTOFFLOAD_BASEPTRS]], ptr nonnull [[DOTOFFLOAD_PTRS]], ptr nonnull [[DOTOFFLOAD_SIZES]], ptr @.offload_maptypes.3, ptr null, ptr null) +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +; CHECK-NEXT: [[SIZE_CASTED:%.*]] = zext i32 [[TMP4]] to i64 +; CHECK-NEXT: store i64 [[SIZE_CASTED]], ptr [[DOTOFFLOAD_BASEPTRS2]], align 8 +; CHECK-NEXT: store i64 [[SIZE_CASTED]], ptr [[DOTOFFLOAD_PTRS3]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i64 0, i64 1 +; CHECK-NEXT: store ptr [[A]], ptr [[TMP5]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i64 0, i64 1 +; CHECK-NEXT: store ptr [[A]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(ptr @[[GLOB0]], i64 -1, ptr nonnull @.__omp_offloading_heavyComputation3.region_id, i32 2, ptr nonnull [[DOTOFFLOAD_BASEPTRS2]], ptr nonnull [[DOTOFFLOAD_PTRS3]], ptr @.offload_sizes.2, ptr @.offload_maptypes.4, ptr null, ptr null, i32 0, i32 0) +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP7]], 0 ; CHECK-NEXT: br i1 [[DOTNOT]], label [[OMP_OFFLOAD_CONT:%.*]], label [[OMP_OFFLOAD_FAILED:%.*]] ; CHECK: omp_offload.failed: -; CHECK-NEXT: call void @heavyComputation3FallBack(i64 [[SIZE_CASTED]], double* [[A]]) +; CHECK-NEXT: call void @heavyComputation3FallBack(i64 [[SIZE_CASTED]], ptr [[A]]) ; CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]] ; CHECK: omp_offload.cont: ; CHECK-NEXT: [[REM:%.*]] = srem i32 [[CALL]], 7 -; CHECK-NEXT: call void @__tgt_target_data_end_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 2, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* nonnull [[TMP5]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null) +; CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB0]], i64 -1, i32 2, ptr nonnull [[DOTOFFLOAD_BASEPTRS]], ptr nonnull [[DOTOFFLOAD_PTRS]], ptr nonnull [[DOTOFFLOAD_SIZES]], ptr @.offload_maptypes.3, ptr null, ptr null) ; CHECK-NEXT: ret i32 [[REM]] ; entry: %size.addr = alloca i32, align 4 - %.offload_baseptrs = alloca [2 x i8*], align 8 - %.offload_ptrs = alloca [2 x i8*], align 8 + %.offload_baseptrs = alloca [2 x ptr], align 8 + %.offload_ptrs = alloca [2 x ptr], align 8 %.offload_sizes = alloca [2 x i64], align 8 - %.offload_baseptrs2 = alloca [2 x i8*], align 8 - %.offload_ptrs3 = alloca [2 x i8*], align 8 - store i32 %size, i32* %size.addr, align 4 + %.offload_baseptrs2 = alloca [2 x ptr], align 8 + %.offload_ptrs3 = alloca [2 x ptr], align 8 + store i32 %size, ptr %size.addr, align 4 - ; FIXME: call to @__tgt_target_data_begin_mapper_issue(%struct.ident_t* @0, ...) should be moved here. + ; FIXME: call to @__tgt_target_data_begin_mapper_issue(ptr @0, ...) should be moved here. %call = tail call i32 (...) @rand() %conv = zext i32 %size to i64 %0 = shl nuw nsw i64 %conv, 3 - %1 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 0 - %2 = bitcast [2 x i8*]* %.offload_baseptrs to double** - store double* %a, double** %2, align 8 - %3 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 0 - %4 = bitcast [2 x i8*]* %.offload_ptrs to double** - store double* %a, double** %4, align 8 - %5 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 0 - store i64 %0, i64* %5, align 8 - %6 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 1 - %7 = bitcast i8** %6 to i32** - store i32* %size.addr, i32** %7, align 8 - %8 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 1 - %9 = bitcast i8** %8 to i32** - store i32* %size.addr, i32** %9, align 8 - %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1 - store i64 4, i64* %10, align 8 - call void @__tgt_target_data_begin_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null) - - %11 = load i32, i32* %size.addr, align 4 - %size.casted = zext i32 %11 to i64 - %12 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs2, i64 0, i64 0 - %13 = bitcast [2 x i8*]* %.offload_baseptrs2 to i64* - store i64 %size.casted, i64* %13, align 8 - %14 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs3, i64 0, i64 0 - %15 = bitcast [2 x i8*]* %.offload_ptrs3 to i64* - store i64 %size.casted, i64* %15, align 8 - %16 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs2, i64 0, i64 1 - %17 = bitcast i8** %16 to double** - store double* %a, double** %17, align 8 - %18 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs3, i64 0, i64 1 - %19 = bitcast i8** %18 to double** - store double* %a, double** %19, align 8 + store ptr %a, ptr %.offload_baseptrs, align 8 + store ptr %a, ptr %.offload_ptrs, align 8 + store i64 %0, ptr %.offload_sizes, align 8 + %1 = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i64 0, i64 1 + store ptr %size.addr, ptr %1, align 8 + %2 = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i64 0, i64 1 + store ptr %size.addr, ptr %2, align 8 + %3 = getelementptr inbounds [2 x i64], ptr %.offload_sizes, i64 0, i64 1 + store i64 4, ptr %3, align 8 + call void @__tgt_target_data_begin_mapper(ptr @0, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes.3, ptr null, ptr null) + + %4 = load i32, ptr %size.addr, align 4 + %size.casted = zext i32 %4 to i64 + store i64 %size.casted, ptr %.offload_baseptrs2, align 8 + store i64 %size.casted, ptr %.offload_ptrs3, align 8 + %5 = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs2, i64 0, i64 1 + store ptr %a, ptr %5, align 8 + %6 = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs3, i64 0, i64 1 + store ptr %a, ptr %6, align 8 ; FIXME: call to @__tgt_target_data_begin_mapper_wait(...) should be moved here. - %20 = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @0, i64 -1, i8* nonnull @.__omp_offloading_heavyComputation3.region_id, i32 2, i8** nonnull %12, i8** nonnull %14, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.2, i64 0, i64 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0) - %.not = icmp eq i32 %20, 0 + %7 = call i32 @__tgt_target_teams_mapper(ptr @0, i64 -1, ptr nonnull @.__omp_offloading_heavyComputation3.region_id, i32 2, ptr nonnull %.offload_baseptrs2, ptr nonnull %.offload_ptrs3, ptr @.offload_sizes.2, ptr @.offload_maptypes.4, ptr null, ptr null, i32 0, i32 0) + %.not = icmp eq i32 %7, 0 br i1 %.not, label %omp_offload.cont, label %omp_offload.failed omp_offload.failed: ; preds = %entry - call void @heavyComputation3FallBack(i64 %size.casted, double* %a) + call void @heavyComputation3FallBack(i64 %size.casted, ptr %a) br label %omp_offload.cont omp_offload.cont: ; preds = %omp_offload.failed, %entry %rem = srem i32 %call, 7 - call void @__tgt_target_data_end_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null) + call void @__tgt_target_data_end_mapper(ptr @0, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes.3, ptr null, ptr null) ret i32 %rem } -define internal void @heavyComputation3FallBack(i64 %size, double* %a) { +define internal void @heavyComputation3FallBack(i64 %size, ptr %a) { ; CHECK-LABEL: define {{[^@]+}}@heavyComputation3FallBack -; CHECK-SAME: (i64 [[SIZE:%.*]], double* [[A:%.*]]) { +; CHECK-SAME: (i64 [[SIZE:%.*]], ptr [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; @@ -444,12 +370,12 @@ entry: ret void } -;int dataTransferOnly1(double* restrict a, unsigned size) { +;int dataTransferOnly1(ptr restrict a, unsigned size) { ; // Random computation. ; int random = rand(); ; ; //#pragma omp target data map(to:a[0:size]) -; void* args[1]; +; ptr args[1]; ; args[0] = &a; ; __tgt_target_data_begin(..., args, ...) ; @@ -457,29 +383,24 @@ entry: ; random %= size; ; return random; ;} -define dso_local i32 @dataTransferOnly1(double* noalias %a, i32 %size) { +define dso_local i32 @dataTransferOnly1(ptr noalias %a, i32 %size) { ; CHECK-LABEL: define {{[^@]+}}@dataTransferOnly1 -; CHECK-SAME: (double* noalias [[A:%.*]], i32 [[SIZE:%.*]]) { +; CHECK-SAME: (ptr noalias [[A:%.*]], i32 [[SIZE:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8 -; CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 ; CHECK-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [1 x i64], align 8 ; CHECK-NEXT: [[HANDLE:%.*]] = alloca [[STRUCT___TGT_ASYNC_INFO:%.*]], align 8 ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @rand() ; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[SIZE]] to i64 ; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[CONV]], 3 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]] to double** -; CHECK-NEXT: store double* [[A]], double** [[TMP2]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_PTRS]] to double** -; CHECK-NEXT: store double* [[A]], double** [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[DOTOFFLOAD_SIZES]], i64 0, i64 0 -; CHECK-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 -; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_issue(%struct.ident_t* @[[GLOB0]], i64 -1, i32 1, i8** [[TMP1]], i8** [[TMP3]], i64* [[TMP5]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null, i8** null, %struct.__tgt_async_info* [[HANDLE]]) +; CHECK-NEXT: store ptr [[A]], ptr [[DOTOFFLOAD_BASEPTRS]], align 8 +; CHECK-NEXT: store ptr [[A]], ptr [[DOTOFFLOAD_PTRS]], align 8 +; CHECK-NEXT: store i64 [[TMP0]], ptr [[DOTOFFLOAD_SIZES]], align 8 +; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_issue(ptr @[[GLOB0]], i64 -1, i32 1, ptr [[DOTOFFLOAD_BASEPTRS]], ptr [[DOTOFFLOAD_PTRS]], ptr [[DOTOFFLOAD_SIZES]], ptr @.offload_maptypes.5, ptr null, ptr null, ptr [[HANDLE]]) ; CHECK-NEXT: [[REM:%.*]] = urem i32 [[CALL]], [[SIZE]] -; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info* [[HANDLE]]) -; CHECK-NEXT: call void @__tgt_target_data_end_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 1, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* nonnull [[TMP5]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null, i8** null) +; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, ptr [[HANDLE]]) +; CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB0]], i64 -1, i32 1, ptr nonnull [[DOTOFFLOAD_BASEPTRS]], ptr nonnull [[DOTOFFLOAD_PTRS]], ptr nonnull [[DOTOFFLOAD_SIZES]], ptr @.offload_maptypes.5, ptr null, ptr null) ; CHECK-NEXT: ret i32 [[REM]] ; @@ -489,8 +410,8 @@ define dso_local i32 @dataTransferOnly1(double* noalias %a, i32 %size) { entry: - %.offload_baseptrs = alloca [1 x i8*], align 8 - %.offload_ptrs = alloca [1 x i8*], align 8 + %.offload_baseptrs = alloca [1 x ptr], align 8 + %.offload_ptrs = alloca [1 x ptr], align 8 %.offload_sizes = alloca [1 x i64], align 8 ; FIXME: call to @__tgt_target_data_begin_issue_mapper(...) should be moved here. @@ -498,30 +419,25 @@ entry: %conv = zext i32 %size to i64 %0 = shl nuw nsw i64 %conv, 3 - %1 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_baseptrs, i64 0, i64 0 - %2 = bitcast [1 x i8*]* %.offload_baseptrs to double** - store double* %a, double** %2, align 8 - %3 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs, i64 0, i64 0 - %4 = bitcast [1 x i8*]* %.offload_ptrs to double** - store double* %a, double** %4, align 8 - %5 = getelementptr inbounds [1 x i64], [1 x i64]* %.offload_sizes, i64 0, i64 0 - store i64 %0, i64* %5, align 8 - call void @__tgt_target_data_begin_mapper(%struct.ident_t* @0, i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null, i8** null) + store ptr %a, ptr %.offload_baseptrs, align 8 + store ptr %a, ptr %.offload_ptrs, align 8 + store i64 %0, ptr %.offload_sizes, align 8 + call void @__tgt_target_data_begin_mapper(ptr @0, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes.5, ptr null, ptr null) %rem = urem i32 %call, %size - call void @__tgt_target_data_end_mapper(%struct.ident_t* @0, i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null, i8** null) + call void @__tgt_target_data_end_mapper(ptr @0, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes.5, ptr null, ptr null) ret i32 %rem } -declare void @__tgt_target_data_begin_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) -declare i32 @__tgt_target_teams_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i32) -declare void @__tgt_target_data_end_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +declare void @__tgt_target_data_begin_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) +declare i32 @__tgt_target_teams_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32) +declare void @__tgt_target_data_end_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) declare dso_local i32 @rand(...) -; CHECK: declare void @__tgt_target_data_begin_mapper_issue(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**, %struct.__tgt_async_info*) -; CHECK: declare void @__tgt_target_data_begin_mapper_wait(i64, %struct.__tgt_async_info*) +; CHECK: declare void @__tgt_target_data_begin_mapper_issue(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr) +; CHECK: declare void @__tgt_target_data_begin_mapper_wait(i64, ptr) !llvm.module.flags = !{!0} diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion_cg_update.ll b/llvm/test/Transforms/OpenMP/parallel_deletion_cg_update.ll index 17db5d0..f15cbd6 100644 --- a/llvm/test/Transforms/OpenMP/parallel_deletion_cg_update.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion_cg_update.ll @@ -4,12 +4,11 @@ ; CHECK-NEXT: CS calls function 'dead_fork_call' ; CHECK-NEXT: CS calls function '__kmpc_fork_call' ; CHECK-NEXT: CS calls function 'live_fork_call' -; CHECK-NEXT: CS calls function '.omp_outlined..1' ; CHECK-NEXT: CS calls function 'd' ; ; CHECK: Call graph node for function: '.omp_outlined..0'<<{{.*}}>> #uses=0 ; -; CHECK: Call graph node for function: '.omp_outlined..1'<<{{.*}}>> #uses=3 +; CHECK: Call graph node for function: '.omp_outlined..1'<<{{.*}}>> #uses=2 ; CHECK: CS<{{.*}}> calls function 'd' ; ; CHECK: Call graph node for function: '__kmpc_fork_call'<<{{.*}}>> #uses=3 @@ -29,10 +28,10 @@ ; CHECK: CS calls function '.omp_outlined..1' -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 +@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8 define dso_local void @dead_fork_call() { entry: @@ -43,7 +42,7 @@ if.then: ; preds = %entry if.else: ; preds = %entry call void @dead_fork_call2() - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*)) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @0, i32 0, ptr @.omp_outlined..0) br label %if.end if.end: ; preds = %if.else, %if.then @@ -52,33 +51,33 @@ if.end: ; preds = %if.else, %if.then define internal void @dead_fork_call2() { entry: - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @0, i32 0, ptr @.omp_outlined..1) ret void } -define internal void @.omp_outlined..0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @.omp_outlined..0(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 ret void } -declare !callback !2 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) +declare !callback !2 void @__kmpc_fork_call(ptr, i32, ptr, ...) define dso_local void @live_fork_call() { entry: - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @0, i32 0, ptr @.omp_outlined..1) ret void } -define internal void @.omp_outlined..1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @.omp_outlined..1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void (...) @d() ret void } -- 2.7.4