From c00ffbe02b222ec34a0181436c17025a2dfacedc Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 23 Dec 2022 10:03:38 +0100 Subject: [PATCH] [VectorCombine] Convert tests to opaque pointers (NFC) --- .../load-extract-insert-store-scalarization.ll | 74 +- .../AArch64/load-extractelement-scalarization.ll | 284 +++--- .../AMDGPU/as-transition-inseltpoison.ll | 15 +- .../VectorCombine/AMDGPU/as-transition.ll | 15 +- .../VectorCombine/Hexagon/load-inseltpoison.ll | 6 +- llvm/test/Transforms/VectorCombine/Hexagon/load.ll | 6 +- .../X86/insert-binop-with-constant-inseltpoison.ll | 18 +- .../X86/insert-binop-with-constant.ll | 18 +- .../VectorCombine/X86/load-inseltpoison.ll | 465 +++++----- llvm/test/Transforms/VectorCombine/X86/load.ll | 451 +++++----- .../X86/scalarize-cmp-inseltpoison.ll | 14 +- .../Transforms/VectorCombine/X86/scalarize-cmp.ll | 14 +- .../VectorCombine/X86/scalarize-vector-gep.ll | 948 ++++++++++----------- .../Transforms/VectorCombine/load-insert-store.ll | 410 +++++---- 14 files changed, 1325 insertions(+), 1413 deletions(-) diff --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll index a2f6048..605c077 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll @@ -3,45 +3,45 @@ target triple = "arm64-apple-darwin" -define void @load_extract_insert_store_const_idx(<225 x double>* %A) { +define void @load_extract_insert_store_const_idx(ptr %A) { ; CHECK-LABEL: @load_extract_insert_store_const_idx( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A:%.*]], i32 0, i64 0 -; CHECK-NEXT: [[EXT_0:%.*]] = load double, double* [[TMP0]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i32 0, i64 0 +; CHECK-NEXT: [[EXT_0:%.*]] = load double, ptr [[TMP0]], align 8 ; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i32 0, i64 1 -; CHECK-NEXT: [[EXT_1:%.*]] = load double, double* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i32 0, i64 1 +; CHECK-NEXT: [[EXT_1:%.*]] = load double, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 1 -; CHECK-NEXT: store double [[SUB]], double* [[TMP2]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 1 +; CHECK-NEXT: store double [[SUB]], ptr [[TMP2]], align 8 ; CHECK-NEXT: ret void ; entry: - %lv = load <225 x double>, <225 x double>* %A, align 8 + %lv = load <225 x double>, ptr %A, align 8 %ext.0 = extractelement <225 x double> %lv, i64 0 %mul = fmul double 20.0, %ext.0 %ext.1 = extractelement <225 x double> %lv, i64 1 %sub = fsub double %ext.1, %mul %ins = insertelement <225 x double> %lv, double %sub, i64 1 - store <225 x double> %ins, <225 x double>* %A, align 8 + store <225 x double> %ins, ptr %A, align 8 ret void } -define void @load_extract_insert_store_var_idx_assume_valid(i64 %idx.1, i64 %idx.2, <225 x double>* %A) { +define void @load_extract_insert_store_var_idx_assume_valid(i64 %idx.1, i64 %idx.2, ptr %A) { ; CHECK-LABEL: @load_extract_insert_store_var_idx_assume_valid( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i64 [[IDX_1:%.*]], 225 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_1]]) ; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i64 [[IDX_2:%.*]], 225 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_2]]) -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A:%.*]], i32 0, i64 [[IDX_1]] -; CHECK-NEXT: [[EXT_0:%.*]] = load double, double* [[TMP0]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i32 0, i64 [[IDX_1]] +; CHECK-NEXT: [[EXT_0:%.*]] = load double, ptr [[TMP0]], align 8 ; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i32 0, i64 [[IDX_2]] -; CHECK-NEXT: [[EXT_1:%.*]] = load double, double* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i32 0, i64 [[IDX_2]] +; CHECK-NEXT: [[EXT_1:%.*]] = load double, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 [[IDX_1]] -; CHECK-NEXT: store double [[SUB]], double* [[TMP2]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[IDX_1]] +; CHECK-NEXT: store double [[SUB]], ptr [[TMP2]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -50,19 +50,19 @@ entry: %cmp.2 = icmp ult i64 %idx.2, 225 call void @llvm.assume(i1 %cmp.2) - %lv = load <225 x double>, <225 x double>* %A, align 8 + %lv = load <225 x double>, ptr %A, align 8 %ext.0 = extractelement <225 x double> %lv, i64 %idx.1 %mul = fmul double 20.0, %ext.0 %ext.1 = extractelement <225 x double> %lv, i64 %idx.2 %sub = fsub double %ext.1, %mul %ins = insertelement <225 x double> %lv, double %sub, i64 %idx.1 - store <225 x double> %ins, <225 x double>* %A, align 8 + store <225 x double> %ins, ptr %A, align 8 ret void } declare i1 @cond() -define void @load_extract_insert_store_var_idx_assume_valid_in_dominating_block(i64 %idx.1, i64 %idx.2, <225 x double>* %A, i1 %c.1) { +define void @load_extract_insert_store_var_idx_assume_valid_in_dominating_block(i64 %idx.1, i64 %idx.2, ptr %A, i1 %c.1) { ; CHECK-LABEL: @load_extract_insert_store_var_idx_assume_valid_in_dominating_block( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i64 [[IDX_1:%.*]], 225 @@ -71,14 +71,14 @@ define void @load_extract_insert_store_var_idx_assume_valid_in_dominating_block( ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_2]]) ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A:%.*]], i32 0, i64 [[IDX_1]] -; CHECK-NEXT: [[EXT_0:%.*]] = load double, double* [[TMP0]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i32 0, i64 [[IDX_1]] +; CHECK-NEXT: [[EXT_0:%.*]] = load double, ptr [[TMP0]], align 8 ; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i32 0, i64 [[IDX_2]] -; CHECK-NEXT: [[EXT_1:%.*]] = load double, double* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i32 0, i64 [[IDX_2]] +; CHECK-NEXT: [[EXT_1:%.*]] = load double, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 [[IDX_1]] -; CHECK-NEXT: store double [[SUB]], double* [[TMP2]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[IDX_1]] +; CHECK-NEXT: store double [[SUB]], ptr [[TMP2]], align 8 ; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -92,13 +92,13 @@ entry: br i1 %c.1, label %loop, label %exit loop: - %lv = load <225 x double>, <225 x double>* %A, align 8 + %lv = load <225 x double>, ptr %A, align 8 %ext.0 = extractelement <225 x double> %lv, i64 %idx.1 %mul = fmul double 20.0, %ext.0 %ext.1 = extractelement <225 x double> %lv, i64 %idx.2 %sub = fsub double %ext.1, %mul %ins = insertelement <225 x double> %lv, double %sub, i64 %idx.1 - store <225 x double> %ins, <225 x double>* %A, align 8 + store <225 x double> %ins, ptr %A, align 8 %c.2 = call i1 @cond() br i1 %c.2, label %loop, label %exit @@ -106,7 +106,7 @@ exit: ret void } -define void @load_extract_insert_store_var_idx_assume_valid_in_non_dominating_block(i64 %idx.1, i64 %idx.2, <225 x double>* %A, i1 %c.1, i1 %c.2) { +define void @load_extract_insert_store_var_idx_assume_valid_in_non_dominating_block(i64 %idx.1, i64 %idx.2, ptr %A, i1 %c.1, i1 %c.2) { ; CHECK-LABEL: @load_extract_insert_store_var_idx_assume_valid_in_non_dominating_block( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[ASSUME_BLOCK:%.*]], label [[LOOP:%.*]] @@ -117,13 +117,13 @@ define void @load_extract_insert_store_var_idx_assume_valid_in_non_dominating_bl ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_2]]) ; CHECK-NEXT: br i1 [[C_2:%.*]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[LV:%.*]] = load <225 x double>, <225 x double>* [[A:%.*]], align 8 +; CHECK-NEXT: [[LV:%.*]] = load <225 x double>, ptr [[A:%.*]], align 8 ; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_1]] ; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]] ; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_2]] ; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]] ; CHECK-NEXT: [[INS:%.*]] = insertelement <225 x double> [[LV]], double [[SUB]], i64 [[IDX_1]] -; CHECK-NEXT: store <225 x double> [[INS]], <225 x double>* [[A]], align 8 +; CHECK-NEXT: store <225 x double> [[INS]], ptr [[A]], align 8 ; CHECK-NEXT: [[C_3:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[C_3]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -140,13 +140,13 @@ assume_block: br i1 %c.2, label %loop, label %exit loop: - %lv = load <225 x double>, <225 x double>* %A, align 8 + %lv = load <225 x double>, ptr %A, align 8 %ext.0 = extractelement <225 x double> %lv, i64 %idx.1 %mul = fmul double 20.0, %ext.0 %ext.1 = extractelement <225 x double> %lv, i64 %idx.2 %sub = fsub double %ext.1, %mul %ins = insertelement <225 x double> %lv, double %sub, i64 %idx.1 - store <225 x double> %ins, <225 x double>* %A, align 8 + store <225 x double> %ins, ptr %A, align 8 %c.3 = call i1 @cond() br i1 %c.3, label %loop, label %exit @@ -154,26 +154,26 @@ exit: ret void } -define void @load_extract_insert_store_var_idx_no_assume_valid(i64 %idx.1, i64 %idx.2, <225 x double>* %A) { +define void @load_extract_insert_store_var_idx_no_assume_valid(i64 %idx.1, i64 %idx.2, ptr %A) { ; CHECK-LABEL: @load_extract_insert_store_var_idx_no_assume_valid( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LV:%.*]] = load <225 x double>, <225 x double>* [[A:%.*]], align 8 +; CHECK-NEXT: [[LV:%.*]] = load <225 x double>, ptr [[A:%.*]], align 8 ; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_1:%.*]] ; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]] ; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_2:%.*]] ; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]] ; CHECK-NEXT: [[INS:%.*]] = insertelement <225 x double> [[LV]], double [[SUB]], i64 [[IDX_1]] -; CHECK-NEXT: store <225 x double> [[INS]], <225 x double>* [[A]], align 8 +; CHECK-NEXT: store <225 x double> [[INS]], ptr [[A]], align 8 ; CHECK-NEXT: ret void ; entry: - %lv = load <225 x double>, <225 x double>* %A, align 8 + %lv = load <225 x double>, ptr %A, align 8 %ext.0 = extractelement <225 x double> %lv, i64 %idx.1 %mul = fmul double 20.0, %ext.0 %ext.1 = extractelement <225 x double> %lv, i64 %idx.2 %sub = fsub double %ext.1, %mul %ins = insertelement <225 x double> %lv, double %sub, i64 %idx.1 - store <225 x double> %ins, <225 x double>* %A, align 8 + store <225 x double> %ins, ptr %A, align 8 ret void } diff --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll index d2be11a..142a7bd 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll @@ -2,103 +2,103 @@ ; RUN: opt -passes=vector-combine -mtriple=arm64-apple-darwinos -S %s | FileCheck --check-prefixes=CHECK,LIMIT-DEFAULT %s ; RUN: opt -passes=vector-combine -mtriple=arm64-apple-darwinos -vector-combine-max-scan-instrs=2 -S %s | FileCheck --check-prefixes=CHECK,LIMIT2 %s -define i32 @load_extract_idx_0(<4 x i32>* %x) { +define i32 @load_extract_idx_0(ptr %x) { ; CHECK-LABEL: @load_extract_idx_0( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 3 -; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 3 +; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 4 ; CHECK-NEXT: ret i32 [[R]] ; - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i32 3 ret i32 %r } ; If the original load had a smaller alignment than the scalar type, the ; smaller alignment should be used. -define i32 @load_extract_idx_0_small_alignment(<4 x i32>* %x) { +define i32 @load_extract_idx_0_small_alignment(ptr %x) { ; CHECK-LABEL: @load_extract_idx_0_small_alignment( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 3 -; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 3 +; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 2 ; CHECK-NEXT: ret i32 [[R]] ; - %lv = load <4 x i32>, <4 x i32>* %x, align 2 + %lv = load <4 x i32>, ptr %x, align 2 %r = extractelement <4 x i32> %lv, i32 3 ret i32 %r } -define i32 @load_extract_idx_1(<4 x i32>* %x) { +define i32 @load_extract_idx_1(ptr %x) { ; CHECK-LABEL: @load_extract_idx_1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 1 -; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 1 +; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 4 ; CHECK-NEXT: ret i32 [[R]] ; - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i32 1 ret i32 %r } -define i32 @load_extract_idx_2(<4 x i32>* %x) { +define i32 @load_extract_idx_2(ptr %x) { ; CHECK-LABEL: @load_extract_idx_2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 2 -; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2 +; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 ; CHECK-NEXT: ret i32 [[R]] ; - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i32 2 ret i32 %r } -define i32 @load_extract_idx_3(<4 x i32>* %x) { +define i32 @load_extract_idx_3(ptr %x) { ; CHECK-LABEL: @load_extract_idx_3( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 3 -; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 3 +; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 4 ; CHECK-NEXT: ret i32 [[R]] ; - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i32 3 ret i32 %r } ; Out-of-bounds index for extractelement, should not be converted to narrow ; load, because it would introduce a dereference of a poison pointer. -define i32 @load_extract_idx_4(<4 x i32>* %x) { +define i32 @load_extract_idx_4(ptr %x) { ; CHECK-LABEL: @load_extract_idx_4( -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 4 ; CHECK-NEXT: ret i32 [[R]] ; - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i32 4 ret i32 %r } -define i32 @load_extract_idx_var_i64(<4 x i32>* %x, i64 %idx) { +define i32 @load_extract_idx_var_i64(ptr %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64( -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX:%.*]] ; CHECK-NEXT: ret i32 [[R]] ; - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i64 %idx ret i32 %r } declare void @maythrow() readnone -define i32 @load_extract_idx_var_i64_known_valid_by_assume(<4 x i32>* %x, i64 %idx) { +define i32 @load_extract_idx_var_i64_known_valid_by_assume(ptr %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_assume( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: call void @maythrow() -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i64 [[IDX]] -; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX]] +; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 ; CHECK-NEXT: ret i32 [[R]] ; entry: %cmp = icmp ult i64 %idx, 4 call void @llvm.assume(i1 %cmp) - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x call void @maythrow() %r = extractelement <4 x i32> %lv, i64 %idx ret i32 %r @@ -106,7 +106,7 @@ entry: declare i1 @cond() -define i32 @load_extract_idx_var_i64_known_valid_by_assume_in_dominating_block(<4 x i32>* %x, i64 %idx, i1 %c.1) { +define i32 @load_extract_idx_var_i64_known_valid_by_assume_in_dominating_block(ptr %x, i64 %idx, i1 %c.1) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_assume_in_dominating_block( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 @@ -114,8 +114,8 @@ define i32 @load_extract_idx_var_i64_known_valid_by_assume_in_dominating_block(< ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]] ; CHECK: loop: ; CHECK-NEXT: call void @maythrow() -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i64 [[IDX]] -; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX]] +; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -128,7 +128,7 @@ entry: br i1 %c.1, label %loop, label %exit loop: - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x call void @maythrow() %r = extractelement <4 x i32> %lv, i64 %idx %c.2 = call i1 @cond() @@ -139,7 +139,7 @@ exit: ret i32 %p } -define i32 @load_extract_idx_var_i64_known_valid_by_assume_in_non_dominating_block(<4 x i32>* %x, i64 %idx, i1 %c.1, i1 %c.2) { +define i32 @load_extract_idx_var_i64_known_valid_by_assume_in_non_dominating_block(ptr %x, i64 %idx, i1 %c.1, i1 %c.2) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_assume_in_non_dominating_block( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[ASSUME_CHECK:%.*]], label [[LOOP:%.*]] @@ -148,7 +148,7 @@ define i32 @load_extract_idx_var_i64_known_valid_by_assume_in_non_dominating_blo ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: br i1 [[C_2:%.*]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: call void @maythrow() ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX]] ; CHECK-NEXT: [[C_3:%.*]] = call i1 @cond() @@ -166,7 +166,7 @@ assume_check: br i1 %c.2, label %loop, label %exit loop: - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x call void @maythrow() %r = extractelement <4 x i32> %lv, i64 %idx %c.3 = call i1 @cond() @@ -177,11 +177,11 @@ exit: ret i32 0 } -define i32 @load_extract_idx_var_i64_not_known_valid_by_assume_after_load(<4 x i32>* %x, i64 %idx) { +define i32 @load_extract_idx_var_i64_not_known_valid_by_assume_after_load(ptr %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_assume_after_load( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: call void @maythrow() ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX]] @@ -189,203 +189,203 @@ define i32 @load_extract_idx_var_i64_not_known_valid_by_assume_after_load(<4 x i ; entry: %cmp = icmp ult i64 %idx, 4 - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x call void @maythrow() call void @llvm.assume(i1 %cmp) %r = extractelement <4 x i32> %lv, i64 %idx ret i32 %r } -define i32 @load_extract_idx_var_i64_not_known_valid_by_assume(<4 x i32>* %x, i64 %idx) { +define i32 @load_extract_idx_var_i64_not_known_valid_by_assume(ptr %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_assume( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 5 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX]] ; CHECK-NEXT: ret i32 [[R]] ; entry: %cmp = icmp ult i64 %idx, 5 call void @llvm.assume(i1 %cmp) - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i64 %idx ret i32 %r } declare void @llvm.assume(i1) -define i32 @load_extract_idx_var_i64_known_valid_by_and(<4 x i32>* %x, i64 %idx) { +define i32 @load_extract_idx_var_i64_known_valid_by_and(ptr %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_and( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 3 -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_CLAMPED]] ; CHECK-NEXT: ret i32 [[R]] ; entry: %idx.clamped = and i64 %idx, 3 - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i64 %idx.clamped ret i32 %r } -define i32 @load_extract_idx_var_i64_known_valid_by_and_noundef(<4 x i32>* %x, i64 noundef %idx) { +define i32 @load_extract_idx_var_i64_known_valid_by_and_noundef(ptr %x, i64 noundef %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_and_noundef( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] -; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] +; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 ; CHECK-NEXT: ret i32 [[R]] ; entry: %idx.clamped = and i64 %idx, 3 - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i64 %idx.clamped ret i32 %r } -define i32 @load_extract_idx_var_i64_not_known_valid_by_and(<4 x i32>* %x, i64 %idx) { +define i32 @load_extract_idx_var_i64_not_known_valid_by_and(ptr %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_and( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 4 -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_CLAMPED]] ; CHECK-NEXT: ret i32 [[R]] ; entry: %idx.clamped = and i64 %idx, 4 - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i64 %idx.clamped ret i32 %r } -define i32 @load_extract_idx_var_i64_known_valid_by_urem(<4 x i32>* %x, i64 %idx) { +define i32 @load_extract_idx_var_i64_known_valid_by_urem(ptr %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_urem( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 4 -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_CLAMPED]] ; CHECK-NEXT: ret i32 [[R]] ; entry: %idx.clamped = urem i64 %idx, 4 - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i64 %idx.clamped ret i32 %r } -define i32 @load_extract_idx_var_i64_known_valid_by_urem_noundef(<4 x i32>* %x, i64 noundef %idx) { +define i32 @load_extract_idx_var_i64_known_valid_by_urem_noundef(ptr %x, i64 noundef %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_urem_noundef( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 4 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] -; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] +; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 ; CHECK-NEXT: ret i32 [[R]] ; entry: %idx.clamped = urem i64 %idx, 4 - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i64 %idx.clamped ret i32 %r } -define i32 @load_extract_idx_var_i64_not_known_valid_by_urem(<4 x i32>* %x, i64 %idx) { +define i32 @load_extract_idx_var_i64_not_known_valid_by_urem(ptr %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_urem( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 5 -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_CLAMPED]] ; CHECK-NEXT: ret i32 [[R]] ; entry: %idx.clamped = urem i64 %idx, 5 - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i64 %idx.clamped ret i32 %r } -define i32 @load_extract_idx_var_i32(<4 x i32>* %x, i32 %idx) { +define i32 @load_extract_idx_var_i32(ptr %x, i32 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i32( -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 [[IDX:%.*]] ; CHECK-NEXT: ret i32 [[R]] ; - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i32 %idx ret i32 %r } declare void @clobber() -define i32 @load_extract_clobber_call_before(<4 x i32>* %x) { +define i32 @load_extract_clobber_call_before(ptr %x) { ; CHECK-LABEL: @load_extract_clobber_call_before( ; CHECK-NEXT: call void @clobber() -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 2 -; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2 +; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 ; CHECK-NEXT: ret i32 [[R]] ; call void @clobber() - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i32 2 ret i32 %r } -define i32 @load_extract_clobber_call_between(<4 x i32>* %x) { +define i32 @load_extract_clobber_call_between(ptr %x) { ; CHECK-LABEL: @load_extract_clobber_call_between( -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: call void @clobber() ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2 ; CHECK-NEXT: ret i32 [[R]] ; - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x call void @clobber() %r = extractelement <4 x i32> %lv, i32 2 ret i32 %r } -define i32 @load_extract_clobber_call_after(<4 x i32>* %x) { +define i32 @load_extract_clobber_call_after(ptr %x) { ; CHECK-LABEL: @load_extract_clobber_call_after( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 2 -; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2 +; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 ; CHECK-NEXT: call void @clobber() ; CHECK-NEXT: ret i32 [[R]] ; - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i32 2 call void @clobber() ret i32 %r } -define i32 @load_extract_clobber_store_before(<4 x i32>* %x, i8* %y) { +define i32 @load_extract_clobber_store_before(ptr %x, ptr %y) { ; CHECK-LABEL: @load_extract_clobber_store_before( -; CHECK-NEXT: store i8 0, i8* [[Y:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 2 -; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 8 +; CHECK-NEXT: store i8 0, ptr [[Y:%.*]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2 +; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 ; CHECK-NEXT: ret i32 [[R]] ; - store i8 0, i8* %y - %lv = load <4 x i32>, <4 x i32>* %x + store i8 0, ptr %y + %lv = load <4 x i32>, ptr %x %r = extractelement <4 x i32> %lv, i32 2 ret i32 %r } -define i32 @load_extract_clobber_store_between(<4 x i32>* %x, i8* %y) { +define i32 @load_extract_clobber_store_between(ptr %x, ptr %y) { ; CHECK-LABEL: @load_extract_clobber_store_between( -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 -; CHECK-NEXT: store i8 0, i8* [[Y:%.*]], align 1 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 +; CHECK-NEXT: store i8 0, ptr [[Y:%.*]], align 1 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2 ; CHECK-NEXT: ret i32 [[R]] ; - %lv = load <4 x i32>, <4 x i32>* %x - store i8 0, i8* %y + %lv = load <4 x i32>, ptr %x + store i8 0, ptr %y %r = extractelement <4 x i32> %lv, i32 2 ret i32 %r } -define i32 @load_extract_clobber_store_between_limit(<4 x i32>* %x, i8* %y, <8 x i32> %z) { +define i32 @load_extract_clobber_store_between_limit(ptr %x, ptr %y, <8 x i32> %z) { ; CHECK-LABEL: @load_extract_clobber_store_between_limit( -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0 ; CHECK-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1 ; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]] @@ -395,12 +395,12 @@ define i32 @load_extract_clobber_store_between_limit(<4 x i32>* %x, i8* %y, <8 x ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]] ; CHECK-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4 ; CHECK-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]] -; CHECK-NEXT: store i8 0, i8* [[Y:%.*]], align 1 +; CHECK-NEXT: store i8 0, ptr [[Y:%.*]], align 1 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2 ; CHECK-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]] ; CHECK-NEXT: ret i32 [[ADD_4]] ; - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %z.0 = extractelement <8 x i32> %z, i32 0 %z.1 = extractelement <8 x i32> %z, i32 1 %add.0 = add i32 %z.0, %z.1 @@ -410,13 +410,13 @@ define i32 @load_extract_clobber_store_between_limit(<4 x i32>* %x, i8* %y, <8 x %add.2 = add i32 %add.1, %z.3 %z.4 = extractelement <8 x i32> %z, i32 4 %add.3 = add i32 %add.2, %z.4 - store i8 0, i8* %y + store i8 0, ptr %y %r = extractelement <4 x i32> %lv, i32 2 %add.4 = add i32 %add.3, %r ret i32 %add.4 } -define i32 @load_extract_clobber_store_after_limit(<4 x i32>* %x, i8* %y, <8 x i32> %z) { +define i32 @load_extract_clobber_store_after_limit(ptr %x, ptr %y, <8 x i32> %z) { ; LIMIT-DEFAULT-LABEL: @load_extract_clobber_store_after_limit( ; LIMIT-DEFAULT-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0 ; LIMIT-DEFAULT-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1 @@ -427,14 +427,14 @@ define i32 @load_extract_clobber_store_after_limit(<4 x i32>* %x, i8* %y, <8 x i ; LIMIT-DEFAULT-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]] ; LIMIT-DEFAULT-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4 ; LIMIT-DEFAULT-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]] -; LIMIT-DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 2 -; LIMIT-DEFAULT-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 8 -; LIMIT-DEFAULT-NEXT: store i8 0, i8* [[Y:%.*]], align 1 +; LIMIT-DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2 +; LIMIT-DEFAULT-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 +; LIMIT-DEFAULT-NEXT: store i8 0, ptr [[Y:%.*]], align 1 ; LIMIT-DEFAULT-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]] ; LIMIT-DEFAULT-NEXT: ret i32 [[ADD_4]] ; ; LIMIT2-LABEL: @load_extract_clobber_store_after_limit( -; LIMIT2-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; LIMIT2-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; LIMIT2-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0 ; LIMIT2-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1 ; LIMIT2-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]] @@ -445,11 +445,11 @@ define i32 @load_extract_clobber_store_after_limit(<4 x i32>* %x, i8* %y, <8 x i ; LIMIT2-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4 ; LIMIT2-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]] ; LIMIT2-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2 -; LIMIT2-NEXT: store i8 0, i8* [[Y:%.*]], align 1 +; LIMIT2-NEXT: store i8 0, ptr [[Y:%.*]], align 1 ; LIMIT2-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]] ; LIMIT2-NEXT: ret i32 [[ADD_4]] ; - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %z.0 = extractelement <8 x i32> %z, i32 0 %z.1 = extractelement <8 x i32> %z, i32 1 %add.0 = add i32 %z.0, %z.1 @@ -460,16 +460,16 @@ define i32 @load_extract_clobber_store_after_limit(<4 x i32>* %x, i8* %y, <8 x i %z.4 = extractelement <8 x i32> %z, i32 4 %add.3 = add i32 %add.2, %z.4 %r = extractelement <4 x i32> %lv, i32 2 - store i8 0, i8* %y + store i8 0, ptr %y %add.4 = add i32 %add.3, %r ret i32 %add.4 } declare void @use.v4i32(<4 x i32>) -define i32 @load_extract_idx_different_bbs(<4 x i32>* %x, i1 %c) { +define i32 @load_extract_idx_different_bbs(ptr %x, i1 %c) { ; CHECK-LABEL: @load_extract_idx_different_bbs( -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 1 @@ -478,7 +478,7 @@ define i32 @load_extract_idx_different_bbs(<4 x i32>* %x, i1 %c) { ; CHECK-NEXT: call void @use.v4i32(<4 x i32> [[LV]]) ; CHECK-NEXT: ret i32 20 ; - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x br i1 %c, label %then, label %else then: @@ -490,27 +490,27 @@ else: ret i32 20 } -define i31 @load_with_non_power_of_2_element_type(<4 x i31>* %x) { +define i31 @load_with_non_power_of_2_element_type(ptr %x) { ; CHECK-LABEL: @load_with_non_power_of_2_element_type( -; CHECK-NEXT: [[LV:%.*]] = load <4 x i31>, <4 x i31>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i31>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i31> [[LV]], i32 1 ; CHECK-NEXT: ret i31 [[R]] ; - %lv = load <4 x i31>, <4 x i31>* %x + %lv = load <4 x i31>, ptr %x %r = extractelement <4 x i31> %lv, i32 1 ret i31 %r } ; Scalarizing the load for multiple constant indices may not be profitable. -define i32 @load_multiple_extracts_with_constant_idx(<4 x i32>* %x) { +define i32 @load_multiple_extracts_with_constant_idx(ptr %x) { ; CHECK-LABEL: @load_multiple_extracts_with_constant_idx( -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[LV]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[LV]], [[SHIFT]] ; CHECK-NEXT: [[RES:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 ; CHECK-NEXT: ret i32 [[RES]] ; - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %e.0 = extractelement <4 x i32> %lv, i32 0 %e.1 = extractelement <4 x i32> %lv, i32 1 %res = add i32 %e.0, %e.1 @@ -519,16 +519,16 @@ define i32 @load_multiple_extracts_with_constant_idx(<4 x i32>* %x) { ; Scalarizing the load for multiple extracts is profitable in this case, ; because the vector large vector requires 2 vector registers. -define i32 @load_multiple_extracts_with_constant_idx_profitable(<8 x i32>* %x) { +define i32 @load_multiple_extracts_with_constant_idx_profitable(ptr %x) { ; CHECK-LABEL: @load_multiple_extracts_with_constant_idx_profitable( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i32>, <8 x i32>* [[X:%.*]], i32 0, i32 0 -; CHECK-NEXT: [[E_0:%.*]] = load i32, i32* [[TMP1]], align 16 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x i32>, <8 x i32>* [[X]], i32 0, i32 6 -; CHECK-NEXT: [[E_1:%.*]] = load i32, i32* [[TMP2]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i32>, ptr [[X:%.*]], i32 0, i32 0 +; CHECK-NEXT: [[E_0:%.*]] = load i32, ptr [[TMP1]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x i32>, ptr [[X]], i32 0, i32 6 +; CHECK-NEXT: [[E_1:%.*]] = load i32, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] ; CHECK-NEXT: ret i32 [[RES]] ; - %lv = load <8 x i32>, <8 x i32>* %x, align 16 + %lv = load <8 x i32>, ptr %x, align 16 %e.0 = extractelement <8 x i32> %lv, i32 0 %e.1 = extractelement <8 x i32> %lv, i32 6 %res = add i32 %e.0, %e.1 @@ -536,24 +536,24 @@ define i32 @load_multiple_extracts_with_constant_idx_profitable(<8 x i32>* %x) { } ; Scalarizing may or may not be profitable, depending on the target. -define i32 @load_multiple_2_with_variable_indices(<4 x i32>* %x, i64 %idx.0, i64 %idx.1) { +define i32 @load_multiple_2_with_variable_indices(ptr %x, i64 %idx.0, i64 %idx.1) { ; CHECK-LABEL: @load_multiple_2_with_variable_indices( -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: [[E_0:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_0:%.*]] ; CHECK-NEXT: [[E_1:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_1:%.*]] ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] ; CHECK-NEXT: ret i32 [[RES]] ; - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %e.0 = extractelement <4 x i32> %lv, i64 %idx.0 %e.1 = extractelement <4 x i32> %lv, i64 %idx.1 %res = add i32 %e.0, %e.1 ret i32 %res } -define i32 @load_4_extracts_with_variable_indices_short_vector(<4 x i32>* %x, i64 %idx.0, i64 %idx.1, i64 %idx.2, i64 %idx.3) { +define i32 @load_4_extracts_with_variable_indices_short_vector(ptr %x, i64 %idx.0, i64 %idx.1, i64 %idx.2, i64 %idx.3) { ; CHECK-LABEL: @load_4_extracts_with_variable_indices_short_vector( -; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: [[E_0:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_0:%.*]] ; CHECK-NEXT: [[E_1:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_1:%.*]] ; CHECK-NEXT: [[E_2:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_2:%.*]] @@ -563,7 +563,7 @@ define i32 @load_4_extracts_with_variable_indices_short_vector(<4 x i32>* %x, i6 ; CHECK-NEXT: [[RES_2:%.*]] = add i32 [[RES_1]], [[E_3]] ; CHECK-NEXT: ret i32 [[RES_2]] ; - %lv = load <4 x i32>, <4 x i32>* %x + %lv = load <4 x i32>, ptr %x %e.0 = extractelement <4 x i32> %lv, i64 %idx.0 %e.1 = extractelement <4 x i32> %lv, i64 %idx.1 %e.2 = extractelement <4 x i32> %lv, i64 %idx.2 @@ -574,11 +574,11 @@ define i32 @load_4_extracts_with_variable_indices_short_vector(<4 x i32>* %x, i6 ret i32 %res.2 } -define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid(<16 x i32>* %x, i64 %idx.0, i64 %idx.1) { +define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid(ptr %x, i64 %idx.0, i64 %idx.1) { ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid( ; CHECK-NEXT: [[CMP_IDX_0:%.*]] = icmp ult i64 [[IDX_0:%.*]], 16 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_IDX_0]]) -; CHECK-NEXT: [[LV:%.*]] = load <16 x i32>, <16 x i32>* [[X:%.*]], align 64 +; CHECK-NEXT: [[LV:%.*]] = load <16 x i32>, ptr [[X:%.*]], align 64 ; CHECK-NEXT: [[E_0:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_0]] ; CHECK-NEXT: [[E_1:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_1:%.*]] ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] @@ -587,23 +587,23 @@ define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_first %cmp.idx.0 = icmp ult i64 %idx.0, 16 call void @llvm.assume(i1 %cmp.idx.0) - %lv = load <16 x i32>, <16 x i32>* %x + %lv = load <16 x i32>, ptr %x %e.0 = extractelement <16 x i32> %lv, i64 %idx.0 %e.1 = extractelement <16 x i32> %lv, i64 %idx.1 %res = add i32 %e.0, %e.1 ret i32 %res } -define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_all_valid(<16 x i32>* %x, i64 %idx.0, i64 %idx.1) { +define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_all_valid(ptr %x, i64 %idx.0, i64 %idx.1) { ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_only_all_valid( ; CHECK-NEXT: [[CMP_IDX_0:%.*]] = icmp ult i64 [[IDX_0:%.*]], 16 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_IDX_0]]) ; CHECK-NEXT: [[CMP_IDX_1:%.*]] = icmp ult i64 [[IDX_1:%.*]], 16 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_IDX_1]]) -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X:%.*]], i32 0, i64 [[IDX_0]] -; CHECK-NEXT: [[E_0:%.*]] = load i32, i32* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X]], i32 0, i64 [[IDX_1]] -; CHECK-NEXT: [[E_1:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_0]] +; CHECK-NEXT: [[E_0:%.*]] = load i32, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X]], i32 0, i64 [[IDX_1]] +; CHECK-NEXT: [[E_1:%.*]] = load i32, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] ; CHECK-NEXT: ret i32 [[RES]] ; @@ -612,17 +612,17 @@ define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_all_v %cmp.idx.1 = icmp ult i64 %idx.1, 16 call void @llvm.assume(i1 %cmp.idx.1) - %lv = load <16 x i32>, <16 x i32>* %x + %lv = load <16 x i32>, ptr %x %e.0 = extractelement <16 x i32> %lv, i64 %idx.0 %e.1 = extractelement <16 x i32> %lv, i64 %idx.1 %res = add i32 %e.0, %e.1 ret i32 %res } -define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid_by_and(<16 x i32>* %x, i64 %idx.0, i64 %idx.1) { +define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid_by_and(ptr %x, i64 %idx.0, i64 %idx.1) { ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid_by_and( ; CHECK-NEXT: [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0:%.*]], 15 -; CHECK-NEXT: [[LV:%.*]] = load <16 x i32>, <16 x i32>* [[X:%.*]], align 64 +; CHECK-NEXT: [[LV:%.*]] = load <16 x i32>, ptr [[X:%.*]], align 64 ; CHECK-NEXT: [[E_0:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_0_CLAMPED]] ; CHECK-NEXT: [[E_1:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_1:%.*]] ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] @@ -630,18 +630,18 @@ define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_first ; %idx.0.clamped = and i64 %idx.0, 15 - %lv = load <16 x i32>, <16 x i32>* %x + %lv = load <16 x i32>, ptr %x %e.0 = extractelement <16 x i32> %lv, i64 %idx.0.clamped %e.1 = extractelement <16 x i32> %lv, i64 %idx.1 %res = add i32 %e.0, %e.1 ret i32 %res } -define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and(<16 x i32>* %x, i64 %idx.0, i64 %idx.1) { +define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and(ptr %x, i64 %idx.0, i64 %idx.1) { ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and( ; CHECK-NEXT: [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0:%.*]], 15 ; CHECK-NEXT: [[IDX_1_CLAMPED:%.*]] = and i64 [[IDX_1:%.*]], 15 -; CHECK-NEXT: [[LV:%.*]] = load <16 x i32>, <16 x i32>* [[X:%.*]], align 64 +; CHECK-NEXT: [[LV:%.*]] = load <16 x i32>, ptr [[X:%.*]], align 64 ; CHECK-NEXT: [[E_0:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_0_CLAMPED]] ; CHECK-NEXT: [[E_1:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_1_CLAMPED]] ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] @@ -650,18 +650,18 @@ define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_ %idx.0.clamped = and i64 %idx.0, 15 %idx.1.clamped = and i64 %idx.1, 15 - %lv = load <16 x i32>, <16 x i32>* %x + %lv = load <16 x i32>, ptr %x %e.0 = extractelement <16 x i32> %lv, i64 %idx.0.clamped %e.1 = extractelement <16 x i32> %lv, i64 %idx.1.clamped %res = add i32 %e.0, %e.1 ret i32 %res } -define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and_some_noundef(<16 x i32>* %x, i64 %idx.0, i64 noundef %idx.1) { +define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and_some_noundef(ptr %x, i64 %idx.0, i64 noundef %idx.1) { ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and_some_noundef( ; CHECK-NEXT: [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0:%.*]], 15 ; CHECK-NEXT: [[IDX_1_CLAMPED:%.*]] = and i64 [[IDX_1:%.*]], 15 -; CHECK-NEXT: [[LV:%.*]] = load <16 x i32>, <16 x i32>* [[X:%.*]], align 64 +; CHECK-NEXT: [[LV:%.*]] = load <16 x i32>, ptr [[X:%.*]], align 64 ; CHECK-NEXT: [[E_0:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_0_CLAMPED]] ; CHECK-NEXT: [[E_1:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_1_CLAMPED]] ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] @@ -670,7 +670,7 @@ define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_ %idx.0.clamped = and i64 %idx.0, 15 %idx.1.clamped = and i64 %idx.1, 15 - %lv = load <16 x i32>, <16 x i32>* %x + %lv = load <16 x i32>, ptr %x %e.0 = extractelement <16 x i32> %lv, i64 %idx.0.clamped %e.1 = extractelement <16 x i32> %lv, i64 %idx.1.clamped %res = add i32 %e.0, %e.1 @@ -678,7 +678,7 @@ define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_ } ; Test case from PR51992. -define i8 @load_extract_safe_due_to_branch_on_poison(<8 x i8> %in, <16 x i8>* %src) { +define i8 @load_extract_safe_due_to_branch_on_poison(<8 x i8> %in, ptr %src) { ; CHECK-LABEL: @load_extract_safe_due_to_branch_on_poison( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXT_IDX:%.*]] = extractelement <8 x i8> [[IN:%.*]], i32 0 @@ -696,7 +696,7 @@ entry: br i1 %cmp, label %then, label %exit then: - %load = load <16 x i8>, <16 x i8>* %src, align 16 + %load = load <16 x i8>, ptr %src, align 16 %and = and i32 %ext.idx.i32, 15 %ext = extractelement <16 x i8> %load, i32 %and br label %exit diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll index b493c2a..9fe30ae 100644 --- a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll @@ -7,23 +7,22 @@ target triple = "amdgcn-amd-amdhsa" %struct.hoge = type { float } -define protected amdgpu_kernel void @load_from_other_as(<4 x float>* nocapture nonnull %resultptr) local_unnamed_addr #0 { +define protected amdgpu_kernel void @load_from_other_as(ptr nocapture nonnull %resultptr) local_unnamed_addr #0 { ; CHECK-LABEL: @load_from_other_as( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast [[STRUCT_HOGE]] addrspace(5)* [[A]] to <1 x float>* -; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, <1 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> -; CHECK-NEXT: store <4 x float> [[E]], <4 x float>* [[RESULTPTR:%.*]], align 16 +; CHECK-NEXT: store <4 x float> [[E]], ptr [[RESULTPTR:%.*]], align 16 ; CHECK-NEXT: ret void ; bb: %a = alloca %struct.hoge, align 4, addrspace(5) - %b = addrspacecast %struct.hoge addrspace(5)* %a to %struct.hoge* - %c = getelementptr inbounds %struct.hoge, %struct.hoge* %b, i64 0, i32 0 - %d = load float, float* %c, align 4 + %b = addrspacecast ptr addrspace(5) %a to ptr + %d = load float, ptr %b, align 4 %e = insertelement <4 x float> poison, float %d, i32 0 - store <4 x float> %e, <4 x float>* %resultptr, align 16 + store <4 x float> %e, ptr %resultptr, align 16 ret void } diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll index 8e36856..ecf85d1 100644 --- a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll +++ b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll @@ -7,23 +7,22 @@ target triple = "amdgcn-amd-amdhsa" %struct.hoge = type { float } -define protected amdgpu_kernel void @load_from_other_as(<4 x float>* nocapture nonnull %resultptr) local_unnamed_addr #0 { +define protected amdgpu_kernel void @load_from_other_as(ptr nocapture nonnull %resultptr) local_unnamed_addr #0 { ; CHECK-LABEL: @load_from_other_as( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast [[STRUCT_HOGE]] addrspace(5)* [[A]] to <1 x float>* -; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, <1 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> -; CHECK-NEXT: store <4 x float> [[E]], <4 x float>* [[RESULTPTR:%.*]], align 16 +; CHECK-NEXT: store <4 x float> [[E]], ptr [[RESULTPTR:%.*]], align 16 ; CHECK-NEXT: ret void ; bb: %a = alloca %struct.hoge, align 4, addrspace(5) - %b = addrspacecast %struct.hoge addrspace(5)* %a to %struct.hoge* - %c = getelementptr inbounds %struct.hoge, %struct.hoge* %b, i64 0, i32 0 - %d = load float, float* %c, align 4 + %b = addrspacecast ptr addrspace(5) %a to ptr + %d = load float, ptr %b, align 4 %e = insertelement <4 x float> undef, float %d, i32 0 - store <4 x float> %e, <4 x float>* %resultptr, align 16 + store <4 x float> %e, ptr %resultptr, align 16 ret void } diff --git a/llvm/test/Transforms/VectorCombine/Hexagon/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/Hexagon/load-inseltpoison.ll index fdfdd6a..8f9b892 100644 --- a/llvm/test/Transforms/VectorCombine/Hexagon/load-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/Hexagon/load-inseltpoison.ll @@ -5,13 +5,13 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; This would crash because TTI returns "0" for vector length. -define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p) { +define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_f32_insert_v4f32( -; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> poison, float [[S]], i32 0 ; CHECK-NEXT: ret <4 x float> [[R]] ; - %s = load float, float* %p, align 4 + %s = load float, ptr %p, align 4 %r = insertelement <4 x float> poison, float %s, i32 0 ret <4 x float> %r } diff --git a/llvm/test/Transforms/VectorCombine/Hexagon/load.ll b/llvm/test/Transforms/VectorCombine/Hexagon/load.ll index 890033d..900c2f4 100644 --- a/llvm/test/Transforms/VectorCombine/Hexagon/load.ll +++ b/llvm/test/Transforms/VectorCombine/Hexagon/load.ll @@ -5,13 +5,13 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; This would crash because TTI returns "0" for vector length. -define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p) { +define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_f32_insert_v4f32( -; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0 ; CHECK-NEXT: ret <4 x float> [[R]] ; - %s = load float, float* %p, align 4 + %s = load float, ptr %p, align 4 %r = insertelement <4 x float> undef, float %s, i32 0 ret <4 x float> %r } diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll index 2ae446d8..e5be89d 100644 --- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll @@ -24,14 +24,14 @@ define <2 x i64> @add_constant_not_undef_lane(i64 %x) { ret <2 x i64> %bo } -define <2 x i64> @add_constant_load(i64* %p) { +define <2 x i64> @add_constant_load(ptr %p) { ; CHECK-LABEL: @add_constant_load( -; CHECK-NEXT: [[LD:%.*]] = load i64, i64* [[P:%.*]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 0 ; CHECK-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], ; CHECK-NEXT: ret <2 x i64> [[BO]] ; - %ld = load i64, i64* %p + %ld = load i64, ptr %p %ins = insertelement <2 x i64> poison, i64 %ld, i32 0 %bo = add <2 x i64> %ins, ret <2 x i64> %bo @@ -150,14 +150,14 @@ define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) { ret <2 x i64> %bo } -define <2 x i64> @shl_constant_op0_load(i64* %p) { +define <2 x i64> @shl_constant_op0_load(ptr %p) { ; CHECK-LABEL: @shl_constant_op0_load( -; CHECK-NEXT: [[LD:%.*]] = load i64, i64* [[P:%.*]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 1 ; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> , [[INS]] ; CHECK-NEXT: ret <2 x i64> [[BO]] ; - %ld = load i64, i64* %p + %ld = load i64, ptr %p %ins = insertelement <2 x i64> poison, i64 %ld, i32 1 %bo = shl <2 x i64> , %ins ret <2 x i64> %bo @@ -201,14 +201,14 @@ define <2 x i64> @shl_constant_op1_not_undef_lane(i64 %x) { ret <2 x i64> %bo } -define <2 x i64> @shl_constant_op1_load(i64* %p) { +define <2 x i64> @shl_constant_op1_load(ptr %p) { ; CHECK-LABEL: @shl_constant_op1_load( -; CHECK-NEXT: [[LD:%.*]] = load i64, i64* [[P:%.*]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 0 ; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], ; CHECK-NEXT: ret <2 x i64> [[BO]] ; - %ld = load i64, i64* %p + %ld = load i64, ptr %p %ins = insertelement <2 x i64> poison, i64 %ld, i32 0 %bo = shl nuw <2 x i64> %ins, ret <2 x i64> %bo diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll index 8600315..2a0db43 100644 --- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll +++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll @@ -24,14 +24,14 @@ define <2 x i64> @add_constant_not_undef_lane(i64 %x) { ret <2 x i64> %bo } -define <2 x i64> @add_constant_load(i64* %p) { +define <2 x i64> @add_constant_load(ptr %p) { ; CHECK-LABEL: @add_constant_load( -; CHECK-NEXT: [[LD:%.*]] = load i64, i64* [[P:%.*]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 0 ; CHECK-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], ; CHECK-NEXT: ret <2 x i64> [[BO]] ; - %ld = load i64, i64* %p + %ld = load i64, ptr %p %ins = insertelement <2 x i64> undef, i64 %ld, i32 0 %bo = add <2 x i64> %ins, ret <2 x i64> %bo @@ -150,14 +150,14 @@ define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) { ret <2 x i64> %bo } -define <2 x i64> @shl_constant_op0_load(i64* %p) { +define <2 x i64> @shl_constant_op0_load(ptr %p) { ; CHECK-LABEL: @shl_constant_op0_load( -; CHECK-NEXT: [[LD:%.*]] = load i64, i64* [[P:%.*]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 1 ; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> , [[INS]] ; CHECK-NEXT: ret <2 x i64> [[BO]] ; - %ld = load i64, i64* %p + %ld = load i64, ptr %p %ins = insertelement <2 x i64> undef, i64 %ld, i32 1 %bo = shl <2 x i64> , %ins ret <2 x i64> %bo @@ -201,14 +201,14 @@ define <2 x i64> @shl_constant_op1_not_undef_lane(i64 %x) { ret <2 x i64> %bo } -define <2 x i64> @shl_constant_op1_load(i64* %p) { +define <2 x i64> @shl_constant_op1_load(ptr %p) { ; CHECK-LABEL: @shl_constant_op1_load( -; CHECK-NEXT: [[LD:%.*]] = load i64, i64* [[P:%.*]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 0 ; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], ; CHECK-NEXT: ret <2 x i64> [[BO]] ; - %ld = load i64, i64* %p + %ld = load i64, ptr %p %ins = insertelement <2 x i64> undef, i64 %ld, i32 0 %bo = shl nuw <2 x i64> %ins, ret <2 x i64> %bo diff --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll index e4ea44a..ffb5bf7 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll @@ -4,324 +4,300 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -define float @matching_fp_scalar(float* align 16 dereferenceable(16) %p) { +define float @matching_fp_scalar(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @matching_fp_scalar( -; CHECK-NEXT: [[R:%.*]] = load float, float* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %r = load float, float* %p, align 16 + %r = load float, ptr %p, align 16 ret float %r } -define float @matching_fp_scalar_volatile(float* align 16 dereferenceable(16) %p) { +define float @matching_fp_scalar_volatile(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @matching_fp_scalar_volatile( -; CHECK-NEXT: [[R:%.*]] = load volatile float, float* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load volatile float, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %r = load volatile float, float* %p, align 16 + %r = load volatile float, ptr %p, align 16 ret float %r } -define double @larger_fp_scalar(float* align 16 dereferenceable(16) %p) { +define double @larger_fp_scalar(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @larger_fp_scalar( -; CHECK-NEXT: [[BC:%.*]] = bitcast float* [[P:%.*]] to double* -; CHECK-NEXT: [[R:%.*]] = load double, double* [[BC]], align 16 +; CHECK-NEXT: [[R:%.*]] = load double, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret double [[R]] ; - %bc = bitcast float* %p to double* - %r = load double, double* %bc, align 16 + %r = load double, ptr %p, align 16 ret double %r } -define float @smaller_fp_scalar(double* align 16 dereferenceable(16) %p) { +define float @smaller_fp_scalar(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @smaller_fp_scalar( -; CHECK-NEXT: [[BC:%.*]] = bitcast double* [[P:%.*]] to float* -; CHECK-NEXT: [[R:%.*]] = load float, float* [[BC]], align 16 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %bc = bitcast double* %p to float* - %r = load float, float* %bc, align 16 + %r = load float, ptr %p, align 16 ret float %r } -define float @matching_fp_vector(<4 x float>* align 16 dereferenceable(16) %p) { +define float @matching_fp_vector(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @matching_fp_vector( -; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x float>* [[P:%.*]] to float* -; CHECK-NEXT: [[R:%.*]] = load float, float* [[BC]], align 16 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %bc = bitcast <4 x float>* %p to float* - %r = load float, float* %bc, align 16 + %r = load float, ptr %p, align 16 ret float %r } -define float @matching_fp_vector_gep00(<4 x float>* align 16 dereferenceable(16) %p) { +define float @matching_fp_vector_gep00(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @matching_fp_vector_gep00( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 0, i64 0 -; CHECK-NEXT: [[R:%.*]] = load float, float* [[GEP]], align 16 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 0, i64 0 - %r = load float, float* %gep, align 16 + %r = load float, ptr %p, align 16 ret float %r } -define float @matching_fp_vector_gep01(<4 x float>* align 16 dereferenceable(20) %p) { +define float @matching_fp_vector_gep01(ptr align 16 dereferenceable(20) %p) { ; CHECK-LABEL: @matching_fp_vector_gep01( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 0, i64 1 -; CHECK-NEXT: [[R:%.*]] = load float, float* [[GEP]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[P:%.*]], i64 0, i64 1 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[GEP]], align 4 ; CHECK-NEXT: ret float [[R]] ; - %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 0, i64 1 - %r = load float, float* %gep, align 4 + %gep = getelementptr inbounds <4 x float>, ptr %p, i64 0, i64 1 + %r = load float, ptr %gep, align 4 ret float %r } -define float @matching_fp_vector_gep01_deref(<4 x float>* align 16 dereferenceable(19) %p) { +define float @matching_fp_vector_gep01_deref(ptr align 16 dereferenceable(19) %p) { ; CHECK-LABEL: @matching_fp_vector_gep01_deref( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 0, i64 1 -; CHECK-NEXT: [[R:%.*]] = load float, float* [[GEP]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[P:%.*]], i64 0, i64 1 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[GEP]], align 4 ; CHECK-NEXT: ret float [[R]] ; - %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 0, i64 1 - %r = load float, float* %gep, align 4 + %gep = getelementptr inbounds <4 x float>, ptr %p, i64 0, i64 1 + %r = load float, ptr %gep, align 4 ret float %r } -define float @matching_fp_vector_gep10(<4 x float>* align 16 dereferenceable(32) %p) { +define float @matching_fp_vector_gep10(ptr align 16 dereferenceable(32) %p) { ; CHECK-LABEL: @matching_fp_vector_gep10( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[R:%.*]] = load float, float* [[GEP]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[GEP]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 1, i64 0 - %r = load float, float* %gep, align 16 + %gep = getelementptr inbounds <4 x float>, ptr %p, i64 1, i64 0 + %r = load float, ptr %gep, align 16 ret float %r } -define float @matching_fp_vector_gep10_deref(<4 x float>* align 16 dereferenceable(31) %p) { +define float @matching_fp_vector_gep10_deref(ptr align 16 dereferenceable(31) %p) { ; CHECK-LABEL: @matching_fp_vector_gep10_deref( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[R:%.*]] = load float, float* [[GEP]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[GEP]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 1, i64 0 - %r = load float, float* %gep, align 16 + %gep = getelementptr inbounds <4 x float>, ptr %p, i64 1, i64 0 + %r = load float, ptr %gep, align 16 ret float %r } -define float @nonmatching_int_vector(<2 x i64>* align 16 dereferenceable(16) %p) { +define float @nonmatching_int_vector(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @nonmatching_int_vector( -; CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64>* [[P:%.*]] to float* -; CHECK-NEXT: [[R:%.*]] = load float, float* [[BC]], align 16 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %bc = bitcast <2 x i64>* %p to float* - %r = load float, float* %bc, align 16 + %r = load float, ptr %p, align 16 ret float %r } -define double @less_aligned(double* align 4 dereferenceable(16) %p) { +define double @less_aligned(ptr align 4 dereferenceable(16) %p) { ; CHECK-LABEL: @less_aligned( -; CHECK-NEXT: [[R:%.*]] = load double, double* [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = load double, ptr [[P:%.*]], align 4 ; CHECK-NEXT: ret double [[R]] ; - %r = load double, double* %p, align 4 + %r = load double, ptr %p, align 4 ret double %r } -define float @matching_fp_scalar_small_deref(float* align 16 dereferenceable(15) %p) { +define float @matching_fp_scalar_small_deref(ptr align 16 dereferenceable(15) %p) { ; CHECK-LABEL: @matching_fp_scalar_small_deref( -; CHECK-NEXT: [[R:%.*]] = load float, float* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %r = load float, float* %p, align 16 + %r = load float, ptr %p, align 16 ret float %r } -define i64 @larger_int_scalar(<4 x float>* align 16 dereferenceable(16) %p) { +define i64 @larger_int_scalar(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @larger_int_scalar( -; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x float>* [[P:%.*]] to i64* -; CHECK-NEXT: [[R:%.*]] = load i64, i64* [[BC]], align 16 +; CHECK-NEXT: [[R:%.*]] = load i64, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret i64 [[R]] ; - %bc = bitcast <4 x float>* %p to i64* - %r = load i64, i64* %bc, align 16 + %r = load i64, ptr %p, align 16 ret i64 %r } -define i8 @smaller_int_scalar(<4 x float>* align 16 dereferenceable(16) %p) { +define i8 @smaller_int_scalar(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @smaller_int_scalar( -; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x float>* [[P:%.*]] to i8* -; CHECK-NEXT: [[R:%.*]] = load i8, i8* [[BC]], align 16 +; CHECK-NEXT: [[R:%.*]] = load i8, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret i8 [[R]] ; - %bc = bitcast <4 x float>* %p to i8* - %r = load i8, i8* %bc, align 16 + %r = load i8, ptr %p, align 16 ret i8 %r } -define double @larger_fp_scalar_256bit_vec(<8 x float>* align 32 dereferenceable(32) %p) { +define double @larger_fp_scalar_256bit_vec(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @larger_fp_scalar_256bit_vec( -; CHECK-NEXT: [[BC:%.*]] = bitcast <8 x float>* [[P:%.*]] to double* -; CHECK-NEXT: [[R:%.*]] = load double, double* [[BC]], align 32 +; CHECK-NEXT: [[R:%.*]] = load double, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret double [[R]] ; - %bc = bitcast <8 x float>* %p to double* - %r = load double, double* %bc, align 32 + %r = load double, ptr %p, align 32 ret double %r } -define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p) nofree nosync { +define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v4f32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; - %s = load float, float* %p, align 4 + %s = load float, ptr %p, align 4 %r = insertelement <4 x float> poison, float %s, i32 0 ret <4 x float> %r } -define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenceable(16) %p) nofree nosync { +define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @casted_load_f32_insert_v4f32( -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; - %b = bitcast <4 x float>* %p to float* - %s = load float, float* %b, align 4 + %s = load float, ptr %p, align 4 %r = insertelement <4 x float> poison, float %s, i32 0 ret <4 x float> %r } ; Element type does not change cost. -define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) nofree nosync { +define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_i32_insert_v4i32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; - %s = load i32, i32* %p, align 4 + %s = load i32, ptr %p, align 4 %r = insertelement <4 x i32> poison, i32 %s, i32 0 ret <4 x i32> %r } ; Pointer type does not change cost. -define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceable(16) %p) nofree nosync { +define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @casted_load_i32_insert_v4i32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; - %b = bitcast <16 x i8>* %p to i32* - %s = load i32, i32* %b, align 4 + %s = load i32, ptr %p, align 4 %r = insertelement <4 x i32> poison, i32 %s, i32 0 ret <4 x i32> %r } ; This is canonical form for vector element access. -define <4 x float> @gep00_load_f32_insert_v4f32(<4 x float>* align 16 dereferenceable(16) %p) nofree nosync { +define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @gep00_load_f32_insert_v4f32( -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; - %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 0, i64 0 - %s = load float, float* %gep, align 16 + %s = load float, ptr %p, align 16 %r = insertelement <4 x float> poison, float %s, i64 0 ret <4 x float> %r } ; Should work with addrspace as well. -define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(<4 x float> addrspace(44)* align 16 dereferenceable(16) %p) nofree nosync { +define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(ptr addrspace(44) align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace( -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float> addrspace(44)* [[P:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr addrspace(44) [[P:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; - %gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(44)* %p, i64 0, i64 0 - %s = load float, float addrspace(44)* %gep, align 16 + %s = load float, ptr addrspace(44) %p, align 16 %r = insertelement <4 x float> poison, float %s, i64 0 ret <4 x float> %r } ; Should work with addrspace even when peeking past unsafe loads through geps -define <4 x i32> @unsafe_load_i32_insert_v4i32_addrspace(i32* align 16 dereferenceable(16) %v3) { +define <4 x i32> @unsafe_load_i32_insert_v4i32_addrspace(ptr align 16 dereferenceable(16) %v3) { ; CHECK-LABEL: @unsafe_load_i32_insert_v4i32_addrspace( -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i32* [[V3:%.*]] to <4 x i32> addrspace(42)* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(42)* [[TMP1]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[V3:%.*]] to ptr addrspace(42) +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(42) [[TMP1]], align 16 ; CHECK-NEXT: [[INSELT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[INSELT]] ; - %t0 = getelementptr inbounds i32, i32* %v3, i32 1 - %t1 = addrspacecast i32* %t0 to i32 addrspace(42)* - %t2 = getelementptr inbounds i32, i32 addrspace(42)* %t1, i64 1 - %val = load i32, i32 addrspace(42)* %t2, align 4 + %t0 = getelementptr inbounds i32, ptr %v3, i32 1 + %t1 = addrspacecast ptr %t0 to ptr addrspace(42) + %t2 = getelementptr inbounds i32, ptr addrspace(42) %t1, i64 1 + %val = load i32, ptr addrspace(42) %t2, align 4 %inselt = insertelement <4 x i32> poison, i32 %val, i32 0 ret <4 x i32> %inselt } ; If there are enough dereferenceable bytes, we can offset the vector load. -define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(18) %p) nofree nosync { +define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %p) nofree nosync { ; CHECK-LABEL: @gep01_load_i16_insert_v8i16( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP]] to <8 x i16>* -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 2 +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 0, i64 1 - %s = load i16, i16* %gep, align 2 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1 + %s = load i16, ptr %gep, align 2 %r = insertelement <8 x i16> poison, i16 %s, i64 0 ret <8 x i16> %r } ; Can't safely load the offset vector, but can load+shuffle if it is profitable. -define <8 x i16> @gep01_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(17) %p) nofree nosync { +define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable(17) %p) nofree nosync { ; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref( -; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1 -; SSE2-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 2 +; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1 +; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 2 ; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0 ; SSE2-NEXT: ret <8 x i16> [[R]] ; ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref( -; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[P:%.*]], align 16 +; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16 ; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 0, i64 1 - %s = load i16, i16* %gep, align 2 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1 + %s = load i16, ptr %gep, align 2 %r = insertelement <8 x i16> poison, i16 %s, i64 0 ret <8 x i16> %r } ; Verify that alignment of the new load is not over-specified. -define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2 dereferenceable(16) %p) nofree nosync { +define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 dereferenceable(16) %p) nofree nosync { ; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign( -; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1 -; SSE2-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 8 +; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1 +; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 8 ; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0 ; SSE2-NEXT: ret <8 x i16> [[R]] ; ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign( -; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[P:%.*]], align 2 +; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2 ; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 0, i64 1 - %s = load i16, i16* %gep, align 8 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1 + %s = load i16, ptr %gep, align 8 %r = insertelement <8 x i16> poison, i16 %s, i64 0 ret <8 x i16> %r } @@ -330,31 +306,27 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2 ; must be a multiple of element size. ; TODO: Could bitcast around this limitation. -define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(16) %p) nofree nosync { +define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @gep01_bitcast_load_i32_insert_v4i32( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 1 -; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[GEP]] to i32* -; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[B]], align 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1 +; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0 ; CHECK-NEXT: ret <4 x i32> [[R]] ; - %gep = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i64 0, i64 1 - %b = bitcast i8* %gep to i32* - %s = load i32, i32* %b, align 1 + %gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 1 + %s = load i32, ptr %gep, align 1 %r = insertelement <4 x i32> poison, i32 %s, i64 0 ret <4 x i32> %r } -define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) nofree nosync { +define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(20) %p) nofree nosync { ; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; - %gep = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i64 0, i64 12 - %b = bitcast i8* %gep to i32* - %s = load i32, i32* %b, align 1 + %gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 12 + %s = load i32, ptr %gep, align 1 %r = insertelement <4 x i32> poison, i32 %s, i64 0 ret <4 x i32> %r } @@ -363,33 +335,30 @@ define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 derefe ; must be a multiple of element size and the offset must be low enough to fit in the vector ; (bitcasting would not help this case). -define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) nofree nosync { +define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(20) %p) nofree nosync { ; CHECK-LABEL: @gep013_bitcast_load_i32_insert_v4i32( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 13 -; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[GEP]] to i32* -; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[B]], align 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 13 +; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0 ; CHECK-NEXT: ret <4 x i32> [[R]] ; - %gep = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i64 0, i64 13 - %b = bitcast i8* %gep to i32* - %s = load i32, i32* %b, align 1 + %gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 13 + %s = load i32, ptr %gep, align 1 %r = insertelement <4 x i32> poison, i32 %s, i64 0 ret <4 x i32> %r } ; If there are enough dereferenceable bytes, we can offset the vector load. -define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(32) %p) nofree nosync { +define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %p) nofree nosync { ; CHECK-LABEL: @gep10_load_i16_insert_v8i16( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP]] to <8 x i16>* -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 - %s = load i16, i16* %gep, align 16 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0 + %s = load i16, ptr %gep, align 16 %r = insertelement <8 x i16> poison, i16 %s, i64 0 ret <8 x i16> %r } @@ -397,43 +366,43 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceabl ; Negative test - disable under asan because widened load can cause spurious ; use-after-poison issues when __asan_poison_memory_region is used. -define <8 x i16> @gep10_load_i16_insert_v8i16_asan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_address { +define <8 x i16> @gep10_load_i16_insert_v8i16_asan(ptr align 16 dereferenceable(32) %p) sanitize_address { ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_asan( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 16 ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0 ; CHECK-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 - %s = load i16, i16* %gep, align 16 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0 + %s = load i16, ptr %gep, align 16 %r = insertelement <8 x i16> poison, i16 %s, i64 0 ret <8 x i16> %r } ; hwasan and memtag should be similarly suppressed. -define <8 x i16> @gep10_load_i16_insert_v8i16_hwasan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_hwaddress { +define <8 x i16> @gep10_load_i16_insert_v8i16_hwasan(ptr align 16 dereferenceable(32) %p) sanitize_hwaddress { ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_hwasan( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 16 ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0 ; CHECK-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 - %s = load i16, i16* %gep, align 16 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0 + %s = load i16, ptr %gep, align 16 %r = insertelement <8 x i16> poison, i16 %s, i64 0 ret <8 x i16> %r } -define <8 x i16> @gep10_load_i16_insert_v8i16_memtag(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_memtag { +define <8 x i16> @gep10_load_i16_insert_v8i16_memtag(ptr align 16 dereferenceable(32) %p) sanitize_memtag { ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_memtag( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 16 ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0 ; CHECK-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 - %s = load i16, i16* %gep, align 16 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0 + %s = load i16, ptr %gep, align 16 %r = insertelement <8 x i16> poison, i16 %s, i64 0 ret <8 x i16> %r } @@ -441,43 +410,43 @@ define <8 x i16> @gep10_load_i16_insert_v8i16_memtag(<8 x i16>* align 16 derefer ; Negative test - disable under tsan because widened load may overlap bytes ; being concurrently modified. tsan does not know that some bytes are undef. -define <8 x i16> @gep10_load_i16_insert_v8i16_tsan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_thread { +define <8 x i16> @gep10_load_i16_insert_v8i16_tsan(ptr align 16 dereferenceable(32) %p) sanitize_thread { ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_tsan( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 16 ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0 ; CHECK-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 - %s = load i16, i16* %gep, align 16 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0 + %s = load i16, ptr %gep, align 16 %r = insertelement <8 x i16> poison, i16 %s, i64 0 ret <8 x i16> %r } ; Negative test - can't safely load the offset vector, but could load+shuffle. -define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(31) %p) nofree nosync { +define <8 x i16> @gep10_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable(31) %p) nofree nosync { ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_deref( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 16 ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0 ; CHECK-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 - %s = load i16, i16* %gep, align 16 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0 + %s = load i16, ptr %gep, align 16 %r = insertelement <8 x i16> poison, i16 %s, i64 0 ret <8 x i16> %r } ; Negative test - do not alter volatile. -define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceable(16) %p) nofree nosync { +define <4 x float> @load_f32_insert_v4f32_volatile(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v4f32_volatile( -; CHECK-NEXT: [[S:%.*]] = load volatile float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[S:%.*]] = load volatile float, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> poison, float [[S]], i32 0 ; CHECK-NEXT: ret <4 x float> [[R]] ; - %s = load volatile float, float* %p, align 4 + %s = load volatile float, ptr %p, align 4 %r = insertelement <4 x float> poison, float %s, i32 0 ret <4 x float> %r } @@ -485,98 +454,93 @@ define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceab ; Pointer is not as aligned as load, but that's ok. ; The new load uses the larger alignment value. -define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(16) %p) nofree nosync { +define <4 x float> @load_f32_insert_v4f32_align(ptr align 1 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v4f32_align( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; - %s = load float, float* %p, align 4 + %s = load float, ptr %p, align 4 %r = insertelement <4 x float> poison, float %s, i32 0 ret <4 x float> %r } ; Negative test - not enough bytes. -define <4 x float> @load_f32_insert_v4f32_deref(float* align 4 dereferenceable(15) %p) nofree nosync { +define <4 x float> @load_f32_insert_v4f32_deref(ptr align 4 dereferenceable(15) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v4f32_deref( -; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> poison, float [[S]], i32 0 ; CHECK-NEXT: ret <4 x float> [[R]] ; - %s = load float, float* %p, align 4 + %s = load float, ptr %p, align 4 %r = insertelement <4 x float> poison, float %s, i32 0 ret <4 x float> %r } -define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) nofree nosync { +define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_i32_insert_v8i32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; - %s = load i32, i32* %p, align 4 + %s = load i32, ptr %p, align 4 %r = insertelement <8 x i32> poison, i32 %s, i32 0 ret <8 x i32> %r } -define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceable(16) %p) nofree nosync { +define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @casted_load_i32_insert_v8i32( -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; - %b = bitcast <4 x i32>* %p to i32* - %s = load i32, i32* %b, align 4 + %s = load i32, ptr %p, align 4 %r = insertelement <8 x i32> poison, i32 %s, i32 0 ret <8 x i32> %r } -define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) %p) nofree nosync { +define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v16f32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> ; CHECK-NEXT: ret <16 x float> [[R]] ; - %s = load float, float* %p, align 4 + %s = load float, ptr %p, align 4 %r = insertelement <16 x float> poison, float %s, i32 0 ret <16 x float> %r } -define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p) nofree nosync { +define <2 x float> @load_f32_insert_v2f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v2f32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> [[R]] ; - %s = load float, float* %p, align 4 + %s = load float, ptr %p, align 4 %r = insertelement <2 x float> poison, float %s, i32 0 ret <2 x float> %r } ; Negative test - suppress load widening for asan/hwasan/memtag/tsan. -define <2 x float> @load_f32_insert_v2f32_asan(float* align 16 dereferenceable(16) %p) sanitize_address { +define <2 x float> @load_f32_insert_v2f32_asan(ptr align 16 dereferenceable(16) %p) sanitize_address { ; CHECK-LABEL: @load_f32_insert_v2f32_asan( -; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> poison, float [[S]], i32 0 ; CHECK-NEXT: ret <2 x float> [[R]] ; - %s = load float, float* %p, align 4 + %s = load float, ptr %p, align 4 %r = insertelement <2 x float> poison, float %s, i32 0 ret <2 x float> %r } -declare float* @getscaleptr() -define void @PR47558_multiple_use_load(<2 x float>* nocapture nonnull %resultptr, <2 x float>* nocapture nonnull readonly %opptr) nofree nosync { +declare ptr @getscaleptr() +define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr nocapture nonnull readonly %opptr) nofree nosync { ; CHECK-LABEL: @PR47558_multiple_use_load( -; CHECK-NEXT: [[SCALEPTR:%.*]] = tail call nonnull align 16 dereferenceable(64) float* @getscaleptr() -; CHECK-NEXT: [[OP:%.*]] = load <2 x float>, <2 x float>* [[OPPTR:%.*]], align 4 -; CHECK-NEXT: [[SCALE:%.*]] = load float, float* [[SCALEPTR]], align 16 +; CHECK-NEXT: [[SCALEPTR:%.*]] = tail call nonnull align 16 dereferenceable(64) ptr @getscaleptr() +; CHECK-NEXT: [[OP:%.*]] = load <2 x float>, ptr [[OPPTR:%.*]], align 4 +; CHECK-NEXT: [[SCALE:%.*]] = load float, ptr [[SCALEPTR]], align 16 ; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x float> poison, float [[SCALE]], i32 0 ; CHECK-NEXT: [[T2:%.*]] = insertelement <2 x float> [[T1]], float [[SCALE]], i32 1 ; CHECK-NEXT: [[T3:%.*]] = fmul <2 x float> [[OP]], [[T2]] @@ -584,12 +548,12 @@ define void @PR47558_multiple_use_load(<2 x float>* nocapture nonnull %resultptr ; CHECK-NEXT: [[RESULT0:%.*]] = insertelement <2 x float> poison, float [[T4]], i32 0 ; CHECK-NEXT: [[T5:%.*]] = extractelement <2 x float> [[T3]], i32 1 ; CHECK-NEXT: [[RESULT1:%.*]] = insertelement <2 x float> [[RESULT0]], float [[T5]], i32 1 -; CHECK-NEXT: store <2 x float> [[RESULT1]], <2 x float>* [[RESULTPTR:%.*]], align 8 +; CHECK-NEXT: store <2 x float> [[RESULT1]], ptr [[RESULTPTR:%.*]], align 8 ; CHECK-NEXT: ret void ; - %scaleptr = tail call nonnull align 16 dereferenceable(64) float* @getscaleptr() - %op = load <2 x float>, <2 x float>* %opptr, align 4 - %scale = load float, float* %scaleptr, align 16 + %scaleptr = tail call nonnull align 16 dereferenceable(64) ptr @getscaleptr() + %op = load <2 x float>, ptr %opptr, align 4 + %scale = load float, ptr %scaleptr, align 16 %t1 = insertelement <2 x float> poison, float %scale, i32 0 %t2 = insertelement <2 x float> %t1, float %scale, i32 1 %t3 = fmul <2 x float> %op, %t2 @@ -597,46 +561,44 @@ define void @PR47558_multiple_use_load(<2 x float>* nocapture nonnull %resultptr %result0 = insertelement <2 x float> poison, float %t4, i32 0 %t5 = extractelement <2 x float> %t3, i32 1 %result1 = insertelement <2 x float> %result0, float %t5, i32 1 - store <2 x float> %result1, <2 x float>* %resultptr, align 8 + store <2 x float> %result1, ptr %resultptr, align 8 ret void } -define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 dereferenceable(16) %p) nofree nosync { +define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_v2f32_extract_insert_v4f32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float>* [[P:%.*]] to <4 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; - %l = load <2 x float>, <2 x float>* %p, align 4 + %l = load <2 x float>, ptr %p, align 4 %s = extractelement <2 x float> %l, i32 0 %r = insertelement <4 x float> poison, float %s, i32 0 ret <4 x float> %r } -define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 dereferenceable(16) %p) nofree nosync { +define <4 x float> @load_v8f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_v8f32_extract_insert_v4f32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float>* [[P:%.*]] to <4 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; - %l = load <8 x float>, <8 x float>* %p, align 4 + %l = load <8 x float>, ptr %p, align 4 %s = extractelement <8 x float> %l, i32 0 %r = insertelement <4 x float> poison, float %s, i32 0 ret <4 x float> %r } -define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16 dereferenceable(16) %p, <1 x i32>* %store_ptr) nofree nosync { +define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 dereferenceable(16) %p, ptr %store_ptr) nofree nosync { ; CHECK-LABEL: @load_v1i32_extract_insert_v8i32_extra_use( -; CHECK-NEXT: [[L:%.*]] = load <1 x i32>, <1 x i32>* [[P:%.*]], align 4 -; CHECK-NEXT: store <1 x i32> [[L]], <1 x i32>* [[STORE_PTR:%.*]], align 4 +; CHECK-NEXT: [[L:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: store <1 x i32> [[L]], ptr [[STORE_PTR:%.*]], align 4 ; CHECK-NEXT: [[S:%.*]] = extractelement <1 x i32> [[L]], i32 0 ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> poison, i32 [[S]], i32 0 ; CHECK-NEXT: ret <8 x i32> [[R]] ; - %l = load <1 x i32>, <1 x i32>* %p, align 4 - store <1 x i32> %l, <1 x i32>* %store_ptr + %l = load <1 x i32>, ptr %p, align 4 + store <1 x i32> %l, ptr %store_ptr %s = extractelement <1 x i32> %l, i32 0 %r = insertelement <8 x i32> poison, i32 %s, i32 0 ret <8 x i32> %r @@ -644,41 +606,40 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16 ; Can't safely load the offset vector, but can load+shuffle if it is profitable. -define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 1 dereferenceable(16) %p) nofree nosync { +define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(ptr align 1 dereferenceable(16) %p) nofree nosync { ; SSE2-LABEL: @gep1_load_v2i16_extract_insert_v8i16( -; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* [[P:%.*]], i64 1 -; SSE2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* [[GEP]], i32 0, i32 0 -; SSE2-NEXT: [[S:%.*]] = load i16, i16* [[TMP1]], align 8 +; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, ptr [[P:%.*]], i64 1 +; SSE2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, ptr [[GEP]], i32 0, i32 0 +; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[TMP1]], align 8 ; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0 ; SSE2-NEXT: ret <8 x i16> [[R]] ; ; AVX2-LABEL: @gep1_load_v2i16_extract_insert_v8i16( -; AVX2-NEXT: [[TMP1:%.*]] = bitcast <2 x i16>* [[P:%.*]] to <8 x i16>* -; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 4 -; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> +; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 4 +; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <2 x i16>, <2 x i16>* %p, i64 1 - %l = load <2 x i16>, <2 x i16>* %gep, align 8 + %gep = getelementptr inbounds <2 x i16>, ptr %p, i64 1 + %l = load <2 x i16>, ptr %gep, align 8 %s = extractelement <2 x i16> %l, i32 0 %r = insertelement <8 x i16> poison, i16 %s, i64 0 ret <8 x i16> %r } ; PR30986 - split vector loads for scalarized operations -define <2 x i64> @PR30986(<2 x i64>* %0) { +define <2 x i64> @PR30986(ptr %0) { ; CHECK-LABEL: @PR30986( -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <2 x i64>, <2 x i64>* [[TMP0:%.*]], i32 0, i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0:%.*]], i32 0, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 16 ; CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, <2 x i64>* [[TMP0]], i32 0, i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[TMP8]], i32 1 ; CHECK-NEXT: ret <2 x i64> [[TMP9]] ; - %2 = load <2 x i64>, <2 x i64>* %0, align 16 + %2 = load <2 x i64>, ptr %0, align 16 %3 = extractelement <2 x i64> %2, i32 0 %4 = tail call i64 @llvm.ctpop.i64(i64 %3) %5 = insertelement <2 x i64> poison, i64 %4, i32 0 diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index 5ad3f70..db40c84 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -4,307 +4,283 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -define float @matching_fp_scalar(float* align 16 dereferenceable(16) %p) { +define float @matching_fp_scalar(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @matching_fp_scalar( -; CHECK-NEXT: [[R:%.*]] = load float, float* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %r = load float, float* %p, align 16 + %r = load float, ptr %p, align 16 ret float %r } -define float @matching_fp_scalar_volatile(float* align 16 dereferenceable(16) %p) { +define float @matching_fp_scalar_volatile(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @matching_fp_scalar_volatile( -; CHECK-NEXT: [[R:%.*]] = load volatile float, float* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load volatile float, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %r = load volatile float, float* %p, align 16 + %r = load volatile float, ptr %p, align 16 ret float %r } -define double @larger_fp_scalar(float* align 16 dereferenceable(16) %p) { +define double @larger_fp_scalar(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @larger_fp_scalar( -; CHECK-NEXT: [[BC:%.*]] = bitcast float* [[P:%.*]] to double* -; CHECK-NEXT: [[R:%.*]] = load double, double* [[BC]], align 16 +; CHECK-NEXT: [[R:%.*]] = load double, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret double [[R]] ; - %bc = bitcast float* %p to double* - %r = load double, double* %bc, align 16 + %r = load double, ptr %p, align 16 ret double %r } -define float @smaller_fp_scalar(double* align 16 dereferenceable(16) %p) { +define float @smaller_fp_scalar(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @smaller_fp_scalar( -; CHECK-NEXT: [[BC:%.*]] = bitcast double* [[P:%.*]] to float* -; CHECK-NEXT: [[R:%.*]] = load float, float* [[BC]], align 16 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %bc = bitcast double* %p to float* - %r = load float, float* %bc, align 16 + %r = load float, ptr %p, align 16 ret float %r } -define float @matching_fp_vector(<4 x float>* align 16 dereferenceable(16) %p) { +define float @matching_fp_vector(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @matching_fp_vector( -; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x float>* [[P:%.*]] to float* -; CHECK-NEXT: [[R:%.*]] = load float, float* [[BC]], align 16 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %bc = bitcast <4 x float>* %p to float* - %r = load float, float* %bc, align 16 + %r = load float, ptr %p, align 16 ret float %r } -define float @matching_fp_vector_gep00(<4 x float>* align 16 dereferenceable(16) %p) { +define float @matching_fp_vector_gep00(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @matching_fp_vector_gep00( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 0, i64 0 -; CHECK-NEXT: [[R:%.*]] = load float, float* [[GEP]], align 16 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 0, i64 0 - %r = load float, float* %gep, align 16 + %r = load float, ptr %p, align 16 ret float %r } -define float @matching_fp_vector_gep01(<4 x float>* align 16 dereferenceable(20) %p) { +define float @matching_fp_vector_gep01(ptr align 16 dereferenceable(20) %p) { ; CHECK-LABEL: @matching_fp_vector_gep01( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 0, i64 1 -; CHECK-NEXT: [[R:%.*]] = load float, float* [[GEP]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[P:%.*]], i64 0, i64 1 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[GEP]], align 4 ; CHECK-NEXT: ret float [[R]] ; - %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 0, i64 1 - %r = load float, float* %gep, align 4 + %gep = getelementptr inbounds <4 x float>, ptr %p, i64 0, i64 1 + %r = load float, ptr %gep, align 4 ret float %r } -define float @matching_fp_vector_gep01_deref(<4 x float>* align 16 dereferenceable(19) %p) { +define float @matching_fp_vector_gep01_deref(ptr align 16 dereferenceable(19) %p) { ; CHECK-LABEL: @matching_fp_vector_gep01_deref( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 0, i64 1 -; CHECK-NEXT: [[R:%.*]] = load float, float* [[GEP]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[P:%.*]], i64 0, i64 1 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[GEP]], align 4 ; CHECK-NEXT: ret float [[R]] ; - %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 0, i64 1 - %r = load float, float* %gep, align 4 + %gep = getelementptr inbounds <4 x float>, ptr %p, i64 0, i64 1 + %r = load float, ptr %gep, align 4 ret float %r } -define float @matching_fp_vector_gep10(<4 x float>* align 16 dereferenceable(32) %p) { +define float @matching_fp_vector_gep10(ptr align 16 dereferenceable(32) %p) { ; CHECK-LABEL: @matching_fp_vector_gep10( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[R:%.*]] = load float, float* [[GEP]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[GEP]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 1, i64 0 - %r = load float, float* %gep, align 16 + %gep = getelementptr inbounds <4 x float>, ptr %p, i64 1, i64 0 + %r = load float, ptr %gep, align 16 ret float %r } -define float @matching_fp_vector_gep10_deref(<4 x float>* align 16 dereferenceable(31) %p) { +define float @matching_fp_vector_gep10_deref(ptr align 16 dereferenceable(31) %p) { ; CHECK-LABEL: @matching_fp_vector_gep10_deref( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[R:%.*]] = load float, float* [[GEP]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[GEP]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 1, i64 0 - %r = load float, float* %gep, align 16 + %gep = getelementptr inbounds <4 x float>, ptr %p, i64 1, i64 0 + %r = load float, ptr %gep, align 16 ret float %r } -define float @nonmatching_int_vector(<2 x i64>* align 16 dereferenceable(16) %p) { +define float @nonmatching_int_vector(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @nonmatching_int_vector( -; CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64>* [[P:%.*]] to float* -; CHECK-NEXT: [[R:%.*]] = load float, float* [[BC]], align 16 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %bc = bitcast <2 x i64>* %p to float* - %r = load float, float* %bc, align 16 + %r = load float, ptr %p, align 16 ret float %r } -define double @less_aligned(double* align 4 dereferenceable(16) %p) { +define double @less_aligned(ptr align 4 dereferenceable(16) %p) { ; CHECK-LABEL: @less_aligned( -; CHECK-NEXT: [[R:%.*]] = load double, double* [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = load double, ptr [[P:%.*]], align 4 ; CHECK-NEXT: ret double [[R]] ; - %r = load double, double* %p, align 4 + %r = load double, ptr %p, align 4 ret double %r } -define float @matching_fp_scalar_small_deref(float* align 16 dereferenceable(15) %p) { +define float @matching_fp_scalar_small_deref(ptr align 16 dereferenceable(15) %p) { ; CHECK-LABEL: @matching_fp_scalar_small_deref( -; CHECK-NEXT: [[R:%.*]] = load float, float* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load float, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret float [[R]] ; - %r = load float, float* %p, align 16 + %r = load float, ptr %p, align 16 ret float %r } -define i64 @larger_int_scalar(<4 x float>* align 16 dereferenceable(16) %p) { +define i64 @larger_int_scalar(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @larger_int_scalar( -; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x float>* [[P:%.*]] to i64* -; CHECK-NEXT: [[R:%.*]] = load i64, i64* [[BC]], align 16 +; CHECK-NEXT: [[R:%.*]] = load i64, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret i64 [[R]] ; - %bc = bitcast <4 x float>* %p to i64* - %r = load i64, i64* %bc, align 16 + %r = load i64, ptr %p, align 16 ret i64 %r } -define i8 @smaller_int_scalar(<4 x float>* align 16 dereferenceable(16) %p) { +define i8 @smaller_int_scalar(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @smaller_int_scalar( -; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x float>* [[P:%.*]] to i8* -; CHECK-NEXT: [[R:%.*]] = load i8, i8* [[BC]], align 16 +; CHECK-NEXT: [[R:%.*]] = load i8, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret i8 [[R]] ; - %bc = bitcast <4 x float>* %p to i8* - %r = load i8, i8* %bc, align 16 + %r = load i8, ptr %p, align 16 ret i8 %r } -define double @larger_fp_scalar_256bit_vec(<8 x float>* align 32 dereferenceable(32) %p) { +define double @larger_fp_scalar_256bit_vec(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @larger_fp_scalar_256bit_vec( -; CHECK-NEXT: [[BC:%.*]] = bitcast <8 x float>* [[P:%.*]] to double* -; CHECK-NEXT: [[R:%.*]] = load double, double* [[BC]], align 32 +; CHECK-NEXT: [[R:%.*]] = load double, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret double [[R]] ; - %bc = bitcast <8 x float>* %p to double* - %r = load double, double* %bc, align 32 + %r = load double, ptr %p, align 32 ret double %r } -define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p) nofree nosync { +define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v4f32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; - %s = load float, float* %p, align 4 + %s = load float, ptr %p, align 4 %r = insertelement <4 x float> undef, float %s, i32 0 ret <4 x float> %r } -define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenceable(16) %p) nofree nosync { +define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @casted_load_f32_insert_v4f32( -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; - %b = bitcast <4 x float>* %p to float* - %s = load float, float* %b, align 4 + %s = load float, ptr %p, align 4 %r = insertelement <4 x float> undef, float %s, i32 0 ret <4 x float> %r } ; Element type does not change cost. -define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) nofree nosync { +define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_i32_insert_v4i32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; - %s = load i32, i32* %p, align 4 + %s = load i32, ptr %p, align 4 %r = insertelement <4 x i32> undef, i32 %s, i32 0 ret <4 x i32> %r } ; Pointer type does not change cost. -define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceable(16) %p) nofree nosync { +define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @casted_load_i32_insert_v4i32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; - %b = bitcast <16 x i8>* %p to i32* - %s = load i32, i32* %b, align 4 + %s = load i32, ptr %p, align 4 %r = insertelement <4 x i32> undef, i32 %s, i32 0 ret <4 x i32> %r } ; This is canonical form for vector element access. -define <4 x float> @gep00_load_f32_insert_v4f32(<4 x float>* align 16 dereferenceable(16) %p) nofree nosync { +define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @gep00_load_f32_insert_v4f32( -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; - %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 0, i64 0 - %s = load float, float* %gep, align 16 + %s = load float, ptr %p, align 16 %r = insertelement <4 x float> undef, float %s, i64 0 ret <4 x float> %r } ; Should work with addrspace as well. -define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(<4 x float> addrspace(44)* align 16 dereferenceable(16) %p) nofree nosync { +define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(ptr addrspace(44) align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace( -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float> addrspace(44)* [[P:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr addrspace(44) [[P:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; - %gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(44)* %p, i64 0, i64 0 - %s = load float, float addrspace(44)* %gep, align 16 + %s = load float, ptr addrspace(44) %p, align 16 %r = insertelement <4 x float> undef, float %s, i64 0 ret <4 x float> %r } ; If there are enough dereferenceable bytes, we can offset the vector load. -define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(18) %p) nofree nosync { +define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %p) nofree nosync { ; CHECK-LABEL: @gep01_load_i16_insert_v8i16( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP]] to <8 x i16>* -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 2 +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 0, i64 1 - %s = load i16, i16* %gep, align 2 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1 + %s = load i16, ptr %gep, align 2 %r = insertelement <8 x i16> undef, i16 %s, i64 0 ret <8 x i16> %r } ; Can't safely load the offset vector, but can load+shuffle if it is profitable. -define <8 x i16> @gep01_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(17) %p) nofree nosync { +define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable(17) %p) nofree nosync { ; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref( -; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1 -; SSE2-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 2 +; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1 +; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 2 ; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 ; SSE2-NEXT: ret <8 x i16> [[R]] ; ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref( -; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[P:%.*]], align 16 +; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16 ; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 0, i64 1 - %s = load i16, i16* %gep, align 2 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1 + %s = load i16, ptr %gep, align 2 %r = insertelement <8 x i16> undef, i16 %s, i64 0 ret <8 x i16> %r } ; Verify that alignment of the new load is not over-specified. -define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2 dereferenceable(16) %p) nofree nosync { +define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 dereferenceable(16) %p) nofree nosync { ; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign( -; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1 -; SSE2-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 8 +; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1 +; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 8 ; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 ; SSE2-NEXT: ret <8 x i16> [[R]] ; ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign( -; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[P:%.*]], align 2 +; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2 ; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 0, i64 1 - %s = load i16, i16* %gep, align 8 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1 + %s = load i16, ptr %gep, align 8 %r = insertelement <8 x i16> undef, i16 %s, i64 0 ret <8 x i16> %r } @@ -313,31 +289,27 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2 ; must be a multiple of element size. ; TODO: Could bitcast around this limitation. -define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(16) %p) { +define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(16) %p) { ; CHECK-LABEL: @gep01_bitcast_load_i32_insert_v4i32( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 1 -; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[GEP]] to i32* -; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[B]], align 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1 +; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0 ; CHECK-NEXT: ret <4 x i32> [[R]] ; - %gep = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i64 0, i64 1 - %b = bitcast i8* %gep to i32* - %s = load i32, i32* %b, align 1 + %gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 1 + %s = load i32, ptr %gep, align 1 %r = insertelement <4 x i32> undef, i32 %s, i64 0 ret <4 x i32> %r } -define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) nofree nosync { +define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(20) %p) nofree nosync { ; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; - %gep = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i64 0, i64 12 - %b = bitcast i8* %gep to i32* - %s = load i32, i32* %b, align 1 + %gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 12 + %s = load i32, ptr %gep, align 1 %r = insertelement <4 x i32> undef, i32 %s, i64 0 ret <4 x i32> %r } @@ -346,33 +318,30 @@ define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 derefe ; must be a multiple of element size and the offset must be low enough to fit in the vector ; (bitcasting would not help this case). -define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) nofree nosync { +define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(20) %p) nofree nosync { ; CHECK-LABEL: @gep013_bitcast_load_i32_insert_v4i32( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 13 -; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[GEP]] to i32* -; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[B]], align 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 13 +; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0 ; CHECK-NEXT: ret <4 x i32> [[R]] ; - %gep = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i64 0, i64 13 - %b = bitcast i8* %gep to i32* - %s = load i32, i32* %b, align 1 + %gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 13 + %s = load i32, ptr %gep, align 1 %r = insertelement <4 x i32> undef, i32 %s, i64 0 ret <4 x i32> %r } ; If there are enough dereferenceable bytes, we can offset the vector load. -define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(32) %p) nofree nosync { +define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %p) nofree nosync { ; CHECK-LABEL: @gep10_load_i16_insert_v8i16( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP]] to <8 x i16>* -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 - %s = load i16, i16* %gep, align 16 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0 + %s = load i16, ptr %gep, align 16 %r = insertelement <8 x i16> undef, i16 %s, i64 0 ret <8 x i16> %r } @@ -380,43 +349,43 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceabl ; Negative test - disable under asan because widened load can cause spurious ; use-after-poison issues when __asan_poison_memory_region is used. -define <8 x i16> @gep10_load_i16_insert_v8i16_asan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_address nofree nosync { +define <8 x i16> @gep10_load_i16_insert_v8i16_asan(ptr align 16 dereferenceable(32) %p) sanitize_address nofree nosync { ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_asan( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 16 ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 ; CHECK-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 - %s = load i16, i16* %gep, align 16 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0 + %s = load i16, ptr %gep, align 16 %r = insertelement <8 x i16> undef, i16 %s, i64 0 ret <8 x i16> %r } ; hwasan and memtag should be similarly suppressed. -define <8 x i16> @gep10_load_i16_insert_v8i16_hwasan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_hwaddress nofree nosync { +define <8 x i16> @gep10_load_i16_insert_v8i16_hwasan(ptr align 16 dereferenceable(32) %p) sanitize_hwaddress nofree nosync { ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_hwasan( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 16 ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 ; CHECK-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 - %s = load i16, i16* %gep, align 16 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0 + %s = load i16, ptr %gep, align 16 %r = insertelement <8 x i16> undef, i16 %s, i64 0 ret <8 x i16> %r } -define <8 x i16> @gep10_load_i16_insert_v8i16_memtag(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_memtag nofree nosync { +define <8 x i16> @gep10_load_i16_insert_v8i16_memtag(ptr align 16 dereferenceable(32) %p) sanitize_memtag nofree nosync { ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_memtag( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 16 ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 ; CHECK-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 - %s = load i16, i16* %gep, align 16 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0 + %s = load i16, ptr %gep, align 16 %r = insertelement <8 x i16> undef, i16 %s, i64 0 ret <8 x i16> %r } @@ -424,43 +393,43 @@ define <8 x i16> @gep10_load_i16_insert_v8i16_memtag(<8 x i16>* align 16 derefer ; Negative test - disable under tsan because widened load may overlap bytes ; being concurrently modified. tsan does not know that some bytes are undef. -define <8 x i16> @gep10_load_i16_insert_v8i16_tsan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_thread nofree nosync { +define <8 x i16> @gep10_load_i16_insert_v8i16_tsan(ptr align 16 dereferenceable(32) %p) sanitize_thread nofree nosync { ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_tsan( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 16 ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 ; CHECK-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 - %s = load i16, i16* %gep, align 16 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0 + %s = load i16, ptr %gep, align 16 %r = insertelement <8 x i16> undef, i16 %s, i64 0 ret <8 x i16> %r } ; Negative test - can't safely load the offset vector, but could load+shuffle. -define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(31) %p) nofree nosync { +define <8 x i16> @gep10_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable(31) %p) nofree nosync { ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_deref( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 -; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 16 ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 ; CHECK-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 - %s = load i16, i16* %gep, align 16 + %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0 + %s = load i16, ptr %gep, align 16 %r = insertelement <8 x i16> undef, i16 %s, i64 0 ret <8 x i16> %r } ; Negative test - do not alter volatile. -define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceable(16) %p) nofree nosync { +define <4 x float> @load_f32_insert_v4f32_volatile(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v4f32_volatile( -; CHECK-NEXT: [[S:%.*]] = load volatile float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[S:%.*]] = load volatile float, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0 ; CHECK-NEXT: ret <4 x float> [[R]] ; - %s = load volatile float, float* %p, align 4 + %s = load volatile float, ptr %p, align 4 %r = insertelement <4 x float> undef, float %s, i32 0 ret <4 x float> %r } @@ -468,98 +437,93 @@ define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceab ; Pointer is not as aligned as load, but that's ok. ; The new load uses the larger alignment value. -define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(16) %p) nofree nosync { +define <4 x float> @load_f32_insert_v4f32_align(ptr align 1 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v4f32_align( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; - %s = load float, float* %p, align 4 + %s = load float, ptr %p, align 4 %r = insertelement <4 x float> undef, float %s, i32 0 ret <4 x float> %r } ; Negative test - not enough bytes. -define <4 x float> @load_f32_insert_v4f32_deref(float* align 4 dereferenceable(15) %p) nofree nosync { +define <4 x float> @load_f32_insert_v4f32_deref(ptr align 4 dereferenceable(15) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v4f32_deref( -; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0 ; CHECK-NEXT: ret <4 x float> [[R]] ; - %s = load float, float* %p, align 4 + %s = load float, ptr %p, align 4 %r = insertelement <4 x float> undef, float %s, i32 0 ret <4 x float> %r } -define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) nofree nosync { +define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_i32_insert_v8i32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; - %s = load i32, i32* %p, align 4 + %s = load i32, ptr %p, align 4 %r = insertelement <8 x i32> undef, i32 %s, i32 0 ret <8 x i32> %r } -define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceable(16) %p) nofree nosync { +define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @casted_load_i32_insert_v8i32( -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; - %b = bitcast <4 x i32>* %p to i32* - %s = load i32, i32* %b, align 4 + %s = load i32, ptr %p, align 4 %r = insertelement <8 x i32> undef, i32 %s, i32 0 ret <8 x i32> %r } -define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) %p) nofree nosync { +define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v16f32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> ; CHECK-NEXT: ret <16 x float> [[R]] ; - %s = load float, float* %p, align 4 + %s = load float, ptr %p, align 4 %r = insertelement <16 x float> undef, float %s, i32 0 ret <16 x float> %r } -define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p) nofree nosync { +define <2 x float> @load_f32_insert_v2f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v2f32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> [[R]] ; - %s = load float, float* %p, align 4 + %s = load float, ptr %p, align 4 %r = insertelement <2 x float> undef, float %s, i32 0 ret <2 x float> %r } ; Negative test - suppress load widening for asan/hwasan/memtag/tsan. -define <2 x float> @load_f32_insert_v2f32_asan(float* align 16 dereferenceable(16) %p) sanitize_address { +define <2 x float> @load_f32_insert_v2f32_asan(ptr align 16 dereferenceable(16) %p) sanitize_address { ; CHECK-LABEL: @load_f32_insert_v2f32_asan( -; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> undef, float [[S]], i32 0 ; CHECK-NEXT: ret <2 x float> [[R]] ; - %s = load float, float* %p, align 4 + %s = load float, ptr %p, align 4 %r = insertelement <2 x float> undef, float %s, i32 0 ret <2 x float> %r } -declare float* @getscaleptr() -define void @PR47558_multiple_use_load(<2 x float>* nocapture nonnull %resultptr, <2 x float>* nocapture nonnull readonly %opptr) { +declare ptr @getscaleptr() +define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr nocapture nonnull readonly %opptr) { ; CHECK-LABEL: @PR47558_multiple_use_load( -; CHECK-NEXT: [[SCALEPTR:%.*]] = tail call nonnull align 16 dereferenceable(64) float* @getscaleptr() -; CHECK-NEXT: [[OP:%.*]] = load <2 x float>, <2 x float>* [[OPPTR:%.*]], align 4 -; CHECK-NEXT: [[SCALE:%.*]] = load float, float* [[SCALEPTR]], align 16 +; CHECK-NEXT: [[SCALEPTR:%.*]] = tail call nonnull align 16 dereferenceable(64) ptr @getscaleptr() +; CHECK-NEXT: [[OP:%.*]] = load <2 x float>, ptr [[OPPTR:%.*]], align 4 +; CHECK-NEXT: [[SCALE:%.*]] = load float, ptr [[SCALEPTR]], align 16 ; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x float> undef, float [[SCALE]], i32 0 ; CHECK-NEXT: [[T2:%.*]] = insertelement <2 x float> [[T1]], float [[SCALE]], i32 1 ; CHECK-NEXT: [[T3:%.*]] = fmul <2 x float> [[OP]], [[T2]] @@ -567,12 +531,12 @@ define void @PR47558_multiple_use_load(<2 x float>* nocapture nonnull %resultptr ; CHECK-NEXT: [[RESULT0:%.*]] = insertelement <2 x float> undef, float [[T4]], i32 0 ; CHECK-NEXT: [[T5:%.*]] = extractelement <2 x float> [[T3]], i32 1 ; CHECK-NEXT: [[RESULT1:%.*]] = insertelement <2 x float> [[RESULT0]], float [[T5]], i32 1 -; CHECK-NEXT: store <2 x float> [[RESULT1]], <2 x float>* [[RESULTPTR:%.*]], align 8 +; CHECK-NEXT: store <2 x float> [[RESULT1]], ptr [[RESULTPTR:%.*]], align 8 ; CHECK-NEXT: ret void ; - %scaleptr = tail call nonnull align 16 dereferenceable(64) float* @getscaleptr() - %op = load <2 x float>, <2 x float>* %opptr, align 4 - %scale = load float, float* %scaleptr, align 16 + %scaleptr = tail call nonnull align 16 dereferenceable(64) ptr @getscaleptr() + %op = load <2 x float>, ptr %opptr, align 4 + %scale = load float, ptr %scaleptr, align 16 %t1 = insertelement <2 x float> undef, float %scale, i32 0 %t2 = insertelement <2 x float> %t1, float %scale, i32 1 %t3 = fmul <2 x float> %op, %t2 @@ -580,46 +544,44 @@ define void @PR47558_multiple_use_load(<2 x float>* nocapture nonnull %resultptr %result0 = insertelement <2 x float> undef, float %t4, i32 0 %t5 = extractelement <2 x float> %t3, i32 1 %result1 = insertelement <2 x float> %result0, float %t5, i32 1 - store <2 x float> %result1, <2 x float>* %resultptr, align 8 + store <2 x float> %result1, ptr %resultptr, align 8 ret void } -define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 dereferenceable(16) %p) nofree nosync { +define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_v2f32_extract_insert_v4f32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float>* [[P:%.*]] to <4 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; - %l = load <2 x float>, <2 x float>* %p, align 4 + %l = load <2 x float>, ptr %p, align 4 %s = extractelement <2 x float> %l, i32 0 %r = insertelement <4 x float> undef, float %s, i32 0 ret <4 x float> %r } -define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 dereferenceable(16) %p) nofree nosync { +define <4 x float> @load_v8f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_v8f32_extract_insert_v4f32( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float>* [[P:%.*]] to <4 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; - %l = load <8 x float>, <8 x float>* %p, align 4 + %l = load <8 x float>, ptr %p, align 4 %s = extractelement <8 x float> %l, i32 0 %r = insertelement <4 x float> undef, float %s, i32 0 ret <4 x float> %r } -define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16 dereferenceable(16) %p, <1 x i32>* %store_ptr) nofree nosync { +define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 dereferenceable(16) %p, ptr %store_ptr) nofree nosync { ; CHECK-LABEL: @load_v1i32_extract_insert_v8i32_extra_use( -; CHECK-NEXT: [[L:%.*]] = load <1 x i32>, <1 x i32>* [[P:%.*]], align 4 -; CHECK-NEXT: store <1 x i32> [[L]], <1 x i32>* [[STORE_PTR:%.*]], align 4 +; CHECK-NEXT: [[L:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: store <1 x i32> [[L]], ptr [[STORE_PTR:%.*]], align 4 ; CHECK-NEXT: [[S:%.*]] = extractelement <1 x i32> [[L]], i32 0 ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0 ; CHECK-NEXT: ret <8 x i32> [[R]] ; - %l = load <1 x i32>, <1 x i32>* %p, align 4 - store <1 x i32> %l, <1 x i32>* %store_ptr + %l = load <1 x i32>, ptr %p, align 4 + store <1 x i32> %l, ptr %store_ptr %s = extractelement <1 x i32> %l, i32 0 %r = insertelement <8 x i32> undef, i32 %s, i32 0 ret <8 x i32> %r @@ -627,41 +589,40 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16 ; Can't safely load the offset vector, but can load+shuffle if it is profitable. -define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 1 dereferenceable(16) %p) nofree nosync { +define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(ptr align 1 dereferenceable(16) %p) nofree nosync { ; SSE2-LABEL: @gep1_load_v2i16_extract_insert_v8i16( -; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* [[P:%.*]], i64 1 -; SSE2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* [[GEP]], i32 0, i32 0 -; SSE2-NEXT: [[S:%.*]] = load i16, i16* [[TMP1]], align 8 +; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, ptr [[P:%.*]], i64 1 +; SSE2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, ptr [[GEP]], i32 0, i32 0 +; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[TMP1]], align 8 ; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 ; SSE2-NEXT: ret <8 x i16> [[R]] ; ; AVX2-LABEL: @gep1_load_v2i16_extract_insert_v8i16( -; AVX2-NEXT: [[TMP1:%.*]] = bitcast <2 x i16>* [[P:%.*]] to <8 x i16>* -; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 4 -; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> +; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 4 +; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; - %gep = getelementptr inbounds <2 x i16>, <2 x i16>* %p, i64 1 - %l = load <2 x i16>, <2 x i16>* %gep, align 8 + %gep = getelementptr inbounds <2 x i16>, ptr %p, i64 1 + %l = load <2 x i16>, ptr %gep, align 8 %s = extractelement <2 x i16> %l, i32 0 %r = insertelement <8 x i16> undef, i16 %s, i64 0 ret <8 x i16> %r } ; PR30986 - split vector loads for scalarized operations -define <2 x i64> @PR30986(<2 x i64>* %0) { +define <2 x i64> @PR30986(ptr %0) { ; CHECK-LABEL: @PR30986( -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <2 x i64>, <2 x i64>* [[TMP0:%.*]], i32 0, i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0:%.*]], i32 0, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 16 ; CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, <2 x i64>* [[TMP0]], i32 0, i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[TMP8]], i32 1 ; CHECK-NEXT: ret <2 x i64> [[TMP9]] ; - %2 = load <2 x i64>, <2 x i64>* %0, align 16 + %2 = load <2 x i64>, ptr %0, align 16 %3 = extractelement <2 x i64> %2, i32 0 %4 = tail call i64 @llvm.ctpop.i64(i64 %3) %5 = insertelement <2 x i64> undef, i64 %4, i32 0 diff --git a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll index 613ede9..225fa3c 100644 --- a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll @@ -167,14 +167,14 @@ define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) { ; negative test - load prevents the transform -define <2 x i1> @constant_op1_i64_load(i64* %p) { +define <2 x i1> @constant_op1_i64_load(ptr %p) { ; CHECK-LABEL: @constant_op1_i64_load( -; CHECK-NEXT: [[LD:%.*]] = load i64, i64* [[P:%.*]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 0 ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], ; CHECK-NEXT: ret <2 x i1> [[R]] ; - %ld = load i64, i64* %p + %ld = load i64, ptr %p %ins = insertelement <2 x i64> poison, i64 %ld, i32 0 %r = icmp eq <2 x i64> %ins, ret <2 x i1> %r @@ -278,13 +278,13 @@ define <4 x float> @vec_select_use2(<4 x float> %x, <4 x float> %y, float %a) { ret <4 x float> %r } -define <4 x i1> @vector_of_pointers(i32* %t1) { +define <4 x i1> @vector_of_pointers(ptr %t1) { ; CHECK-LABEL: @vector_of_pointers( -; CHECK-NEXT: [[T6_SCALAR:%.*]] = icmp ne i32* [[T1:%.*]], null +; CHECK-NEXT: [[T6_SCALAR:%.*]] = icmp ne ptr [[T1:%.*]], null ; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x i1> poison, i1 [[T6_SCALAR]], i64 0 ; CHECK-NEXT: ret <4 x i1> [[T6]] ; - %t5 = insertelement <4 x i32*> poison, i32* %t1, i32 0 - %t6 = icmp ne <4 x i32*> %t5, zeroinitializer + %t5 = insertelement <4 x ptr> poison, ptr %t1, i32 0 + %t6 = icmp ne <4 x ptr> %t5, zeroinitializer ret <4 x i1> %t6 } diff --git a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll index 6cb10fb..11f85b2 100644 --- a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll +++ b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll @@ -167,14 +167,14 @@ define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) { ; negative test - load prevents the transform -define <2 x i1> @constant_op1_i64_load(i64* %p) { +define <2 x i1> @constant_op1_i64_load(ptr %p) { ; CHECK-LABEL: @constant_op1_i64_load( -; CHECK-NEXT: [[LD:%.*]] = load i64, i64* [[P:%.*]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 0 ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], ; CHECK-NEXT: ret <2 x i1> [[R]] ; - %ld = load i64, i64* %p + %ld = load i64, ptr %p %ins = insertelement <2 x i64> undef, i64 %ld, i32 0 %r = icmp eq <2 x i64> %ins, ret <2 x i1> %r @@ -278,13 +278,13 @@ define <4 x float> @vec_select_use2(<4 x float> %x, <4 x float> %y, float %a) { ret <4 x float> %r } -define <4 x i1> @vector_of_pointers(i32* %t1) { +define <4 x i1> @vector_of_pointers(ptr %t1) { ; CHECK-LABEL: @vector_of_pointers( -; CHECK-NEXT: [[T6_SCALAR:%.*]] = icmp ne i32* [[T1:%.*]], null +; CHECK-NEXT: [[T6_SCALAR:%.*]] = icmp ne ptr [[T1:%.*]], null ; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x i1> undef, i1 [[T6_SCALAR]], i64 0 ; CHECK-NEXT: ret <4 x i1> [[T6]] ; - %t5 = insertelement <4 x i32*> undef, i32* %t1, i32 0 - %t6 = icmp ne <4 x i32*> %t5, zeroinitializer + %t5 = insertelement <4 x ptr> undef, ptr %t1, i32 0 + %t6 = icmp ne <4 x ptr> %t5, zeroinitializer ret <4 x i1> %t6 } diff --git a/llvm/test/Transforms/VectorCombine/X86/scalarize-vector-gep.ll b/llvm/test/Transforms/VectorCombine/X86/scalarize-vector-gep.ll index 9f99113..e227e99 100644 --- a/llvm/test/Transforms/VectorCombine/X86/scalarize-vector-gep.ll +++ b/llvm/test/Transforms/VectorCombine/X86/scalarize-vector-gep.ll @@ -4,822 +4,822 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -declare void @use(i64*) +declare void @use(ptr) ;------------------------------------------------------------------------------- -define void @both_operands_need_extraction.2elts(<2 x i64*> %baseptrs, <2 x i64> %indices) { +define void @both_operands_need_extraction.2elts(<2 x ptr> %baseptrs, <2 x i64> %indices) { ; CHECK-LABEL: @both_operands_need_extraction.2elts( -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <2 x i64*> [[BASEPTRS:%.*]], <2 x i64> [[INDICES:%.*]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <2 x ptr> [[BASEPTRS:%.*]], <2 x i64> [[INDICES:%.*]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) ; CHECK-NEXT: ret void ; - %ptrs = getelementptr inbounds i64, <2 x i64*> %baseptrs, <2 x i64> %indices + %ptrs = getelementptr inbounds i64, <2 x ptr> %baseptrs, <2 x i64> %indices - %ptr.0 = extractelement <2 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <2 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <2 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <2 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) ret void } -define void @both_operands_need_extraction.3elts(<3 x i64*> %baseptrs, <3 x i64> %indices) { +define void @both_operands_need_extraction.3elts(<3 x ptr> %baseptrs, <3 x i64> %indices) { ; CHECK-LABEL: @both_operands_need_extraction.3elts( -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x i64*> [[BASEPTRS:%.*]], <3 x i64> [[INDICES:%.*]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x ptr> [[BASEPTRS:%.*]], <3 x i64> [[INDICES:%.*]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) ; CHECK-NEXT: ret void ; - %ptrs = getelementptr inbounds i64, <3 x i64*> %baseptrs, <3 x i64> %indices + %ptrs = getelementptr inbounds i64, <3 x ptr> %baseptrs, <3 x i64> %indices - %ptr.0 = extractelement <3 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <3 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <3 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <3 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <3 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <3 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) ret void } -define void @both_operands_need_extraction.4elts(<4 x i64*> %baseptrs, <4 x i64> %indices) { +define void @both_operands_need_extraction.4elts(<4 x ptr> %baseptrs, <4 x i64> %indices) { ; CHECK-LABEL: @both_operands_need_extraction.4elts( -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <4 x i64*> [[BASEPTRS:%.*]], <4 x i64> [[INDICES:%.*]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) -; CHECK-NEXT: [[PTR_3:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 3 -; CHECK-NEXT: call void @use(i64* [[PTR_3]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <4 x ptr> [[BASEPTRS:%.*]], <4 x i64> [[INDICES:%.*]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) +; CHECK-NEXT: [[PTR_3:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 3 +; CHECK-NEXT: call void @use(ptr [[PTR_3]]) ; CHECK-NEXT: ret void ; - %ptrs = getelementptr inbounds i64, <4 x i64*> %baseptrs, <4 x i64> %indices + %ptrs = getelementptr inbounds i64, <4 x ptr> %baseptrs, <4 x i64> %indices - %ptr.0 = extractelement <4 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <4 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <4 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <4 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <4 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <4 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) - %ptr.3 = extractelement <4 x i64*> %ptrs, i64 3 - call void @use(i64* %ptr.3) + %ptr.3 = extractelement <4 x ptr> %ptrs, i64 3 + call void @use(ptr %ptr.3) ret void } ;------------------------------------------------------------------------------- -define void @indicies_need_extraction.2elts(i64* %baseptr, <2 x i64> %indices) { +define void @indicies_need_extraction.2elts(ptr %baseptr, <2 x i64> %indices) { ; CHECK-LABEL: @indicies_need_extraction.2elts( -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, i64* [[BASEPTR:%.*]], <2 x i64> [[INDICES:%.*]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, ptr [[BASEPTR:%.*]], <2 x i64> [[INDICES:%.*]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) ; CHECK-NEXT: ret void ; - %ptrs = getelementptr inbounds i64, i64* %baseptr, <2 x i64> %indices + %ptrs = getelementptr inbounds i64, ptr %baseptr, <2 x i64> %indices - %ptr.0 = extractelement <2 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <2 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <2 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <2 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) ret void } -define void @indicies_need_extraction.3elts(i64* %baseptr, <3 x i64> %indices) { +define void @indicies_need_extraction.3elts(ptr %baseptr, <3 x i64> %indices) { ; CHECK-LABEL: @indicies_need_extraction.3elts( -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, i64* [[BASEPTR:%.*]], <3 x i64> [[INDICES:%.*]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, ptr [[BASEPTR:%.*]], <3 x i64> [[INDICES:%.*]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) ; CHECK-NEXT: ret void ; - %ptrs = getelementptr inbounds i64, i64* %baseptr, <3 x i64> %indices + %ptrs = getelementptr inbounds i64, ptr %baseptr, <3 x i64> %indices - %ptr.0 = extractelement <3 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <3 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <3 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <3 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <3 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <3 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) ret void } -define void @indicies_need_extraction.4elts(i64* %baseptr, <4 x i64> %indices) { +define void @indicies_need_extraction.4elts(ptr %baseptr, <4 x i64> %indices) { ; CHECK-LABEL: @indicies_need_extraction.4elts( -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, i64* [[BASEPTR:%.*]], <4 x i64> [[INDICES:%.*]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) -; CHECK-NEXT: [[PTR_3:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 3 -; CHECK-NEXT: call void @use(i64* [[PTR_3]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, ptr [[BASEPTR:%.*]], <4 x i64> [[INDICES:%.*]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) +; CHECK-NEXT: [[PTR_3:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 3 +; CHECK-NEXT: call void @use(ptr [[PTR_3]]) ; CHECK-NEXT: ret void ; - %ptrs = getelementptr inbounds i64, i64* %baseptr, <4 x i64> %indices + %ptrs = getelementptr inbounds i64, ptr %baseptr, <4 x i64> %indices - %ptr.0 = extractelement <4 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <4 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <4 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <4 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <4 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <4 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) - %ptr.3 = extractelement <4 x i64*> %ptrs, i64 3 - call void @use(i64* %ptr.3) + %ptr.3 = extractelement <4 x ptr> %ptrs, i64 3 + call void @use(ptr %ptr.3) ret void } ;------------------------------------------------------------------------------- -define void @baseptrs_need_extraction.2elts(<2 x i64*> %baseptrs, i64 %indice) { +define void @baseptrs_need_extraction.2elts(<2 x ptr> %baseptrs, i64 %indice) { ; CHECK-LABEL: @baseptrs_need_extraction.2elts( -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <2 x i64*> [[BASEPTRS:%.*]], i64 [[INDICE:%.*]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <2 x ptr> [[BASEPTRS:%.*]], i64 [[INDICE:%.*]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) ; CHECK-NEXT: ret void ; - %ptrs = getelementptr inbounds i64, <2 x i64*> %baseptrs, i64 %indice + %ptrs = getelementptr inbounds i64, <2 x ptr> %baseptrs, i64 %indice - %ptr.0 = extractelement <2 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <2 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <2 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <2 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) ret void } -define void @baseptrs_need_extraction.3elts(<3 x i64*> %baseptrs, i64 %indice) { +define void @baseptrs_need_extraction.3elts(<3 x ptr> %baseptrs, i64 %indice) { ; CHECK-LABEL: @baseptrs_need_extraction.3elts( -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x i64*> [[BASEPTRS:%.*]], i64 [[INDICE:%.*]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x ptr> [[BASEPTRS:%.*]], i64 [[INDICE:%.*]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) ; CHECK-NEXT: ret void ; - %ptrs = getelementptr inbounds i64, <3 x i64*> %baseptrs, i64 %indice + %ptrs = getelementptr inbounds i64, <3 x ptr> %baseptrs, i64 %indice - %ptr.0 = extractelement <3 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <3 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <3 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <3 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <3 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <3 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) ret void } -define void @baseptrs_need_extraction.4elts(<4 x i64*> %baseptrs, i64 %indice) { +define void @baseptrs_need_extraction.4elts(<4 x ptr> %baseptrs, i64 %indice) { ; CHECK-LABEL: @baseptrs_need_extraction.4elts( -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <4 x i64*> [[BASEPTRS:%.*]], i64 [[INDICE:%.*]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) -; CHECK-NEXT: [[PTR_3:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 3 -; CHECK-NEXT: call void @use(i64* [[PTR_3]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <4 x ptr> [[BASEPTRS:%.*]], i64 [[INDICE:%.*]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) +; CHECK-NEXT: [[PTR_3:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 3 +; CHECK-NEXT: call void @use(ptr [[PTR_3]]) ; CHECK-NEXT: ret void ; - %ptrs = getelementptr inbounds i64, <4 x i64*> %baseptrs, i64 %indice + %ptrs = getelementptr inbounds i64, <4 x ptr> %baseptrs, i64 %indice - %ptr.0 = extractelement <4 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <4 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <4 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <4 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <4 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <4 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) - %ptr.3 = extractelement <4 x i64*> %ptrs, i64 3 - call void @use(i64* %ptr.3) + %ptr.3 = extractelement <4 x ptr> %ptrs, i64 3 + call void @use(ptr %ptr.3) ret void } ;------------------------------------------------------------------------------- -define void @first_baseptr_is_known.2elts(<2 x i64*> %baseptrs, i64* %second_baseptr, <2 x i64> %indices) { +define void @first_baseptr_is_known.2elts(<2 x ptr> %baseptrs, ptr %second_baseptr, <2 x i64> %indices) { ; CHECK-LABEL: @first_baseptr_is_known.2elts( -; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <2 x i64*> [[BASEPTRS:%.*]], i64* [[SECOND_BASEPTR:%.*]], i64 0 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <2 x i64*> [[BASEPTRS_NEW]], <2 x i64> [[INDICES:%.*]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) +; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <2 x ptr> [[BASEPTRS:%.*]], ptr [[SECOND_BASEPTR:%.*]], i64 0 +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <2 x ptr> [[BASEPTRS_NEW]], <2 x i64> [[INDICES:%.*]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) ; CHECK-NEXT: ret void ; - %baseptrs.new = insertelement <2 x i64*> %baseptrs, i64* %second_baseptr, i64 0 - %ptrs = getelementptr inbounds i64, <2 x i64*> %baseptrs.new, <2 x i64> %indices + %baseptrs.new = insertelement <2 x ptr> %baseptrs, ptr %second_baseptr, i64 0 + %ptrs = getelementptr inbounds i64, <2 x ptr> %baseptrs.new, <2 x i64> %indices - %ptr.0 = extractelement <2 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <2 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <2 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <2 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) ret void } -define void @first_indice_is_known.2elts(<2 x i64*> %baseptrs, <2 x i64> %indices, i64 %second_indice) { +define void @first_indice_is_known.2elts(<2 x ptr> %baseptrs, <2 x i64> %indices, i64 %second_indice) { ; CHECK-LABEL: @first_indice_is_known.2elts( ; CHECK-NEXT: [[INDICES_NEW:%.*]] = insertelement <2 x i64> [[INDICES:%.*]], i64 [[SECOND_INDICE:%.*]], i64 0 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <2 x i64*> [[BASEPTRS:%.*]], <2 x i64> [[INDICES_NEW]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <2 x ptr> [[BASEPTRS:%.*]], <2 x i64> [[INDICES_NEW]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) ; CHECK-NEXT: ret void ; %indices.new = insertelement <2 x i64> %indices, i64 %second_indice, i64 0 - %ptrs = getelementptr inbounds i64, <2 x i64*> %baseptrs, <2 x i64> %indices.new + %ptrs = getelementptr inbounds i64, <2 x ptr> %baseptrs, <2 x i64> %indices.new - %ptr.0 = extractelement <2 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <2 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <2 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <2 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) ret void } -define void @first_indice_and_baseptr_are_known.2elts(<2 x i64*> %baseptrs, i64* %second_baseptr, <2 x i64> %indices, i64 %second_indice) { +define void @first_indice_and_baseptr_are_known.2elts(<2 x ptr> %baseptrs, ptr %second_baseptr, <2 x i64> %indices, i64 %second_indice) { ; CHECK-LABEL: @first_indice_and_baseptr_are_known.2elts( -; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <2 x i64*> [[BASEPTRS:%.*]], i64* [[SECOND_BASEPTR:%.*]], i64 0 +; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <2 x ptr> [[BASEPTRS:%.*]], ptr [[SECOND_BASEPTR:%.*]], i64 0 ; CHECK-NEXT: [[INDICES_NEW:%.*]] = insertelement <2 x i64> [[INDICES:%.*]], i64 [[SECOND_INDICE:%.*]], i64 0 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <2 x i64*> [[BASEPTRS_NEW]], <2 x i64> [[INDICES_NEW]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <2 x ptr> [[BASEPTRS_NEW]], <2 x i64> [[INDICES_NEW]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) ; CHECK-NEXT: ret void ; - %baseptrs.new = insertelement <2 x i64*> %baseptrs, i64* %second_baseptr, i64 0 + %baseptrs.new = insertelement <2 x ptr> %baseptrs, ptr %second_baseptr, i64 0 %indices.new = insertelement <2 x i64> %indices, i64 %second_indice, i64 0 - %ptrs = getelementptr inbounds i64, <2 x i64*> %baseptrs.new, <2 x i64> %indices.new + %ptrs = getelementptr inbounds i64, <2 x ptr> %baseptrs.new, <2 x i64> %indices.new - %ptr.0 = extractelement <2 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <2 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <2 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <2 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) ret void } ;------------------------------------------------------------------------------- -define void @first_baseptr_is_known.3elts(<3 x i64*> %baseptrs, i64* %second_baseptr, <3 x i64> %indices) { +define void @first_baseptr_is_known.3elts(<3 x ptr> %baseptrs, ptr %second_baseptr, <3 x i64> %indices) { ; CHECK-LABEL: @first_baseptr_is_known.3elts( -; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <3 x i64*> [[BASEPTRS:%.*]], i64* [[SECOND_BASEPTR:%.*]], i64 0 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x i64*> [[BASEPTRS_NEW]], <3 x i64> [[INDICES:%.*]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) +; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <3 x ptr> [[BASEPTRS:%.*]], ptr [[SECOND_BASEPTR:%.*]], i64 0 +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x ptr> [[BASEPTRS_NEW]], <3 x i64> [[INDICES:%.*]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) ; CHECK-NEXT: ret void ; - %baseptrs.new = insertelement <3 x i64*> %baseptrs, i64* %second_baseptr, i64 0 - %ptrs = getelementptr inbounds i64, <3 x i64*> %baseptrs.new, <3 x i64> %indices + %baseptrs.new = insertelement <3 x ptr> %baseptrs, ptr %second_baseptr, i64 0 + %ptrs = getelementptr inbounds i64, <3 x ptr> %baseptrs.new, <3 x i64> %indices - %ptr.0 = extractelement <3 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <3 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <3 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <3 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <3 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <3 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) ret void } -define void @first_indice_is_known.3elts(<3 x i64*> %baseptrs, <3 x i64> %indices, i64 %second_indice) { +define void @first_indice_is_known.3elts(<3 x ptr> %baseptrs, <3 x i64> %indices, i64 %second_indice) { ; CHECK-LABEL: @first_indice_is_known.3elts( ; CHECK-NEXT: [[INDICES_NEW:%.*]] = insertelement <3 x i64> [[INDICES:%.*]], i64 [[SECOND_INDICE:%.*]], i64 0 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x i64*> [[BASEPTRS:%.*]], <3 x i64> [[INDICES_NEW]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x ptr> [[BASEPTRS:%.*]], <3 x i64> [[INDICES_NEW]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) ; CHECK-NEXT: ret void ; %indices.new = insertelement <3 x i64> %indices, i64 %second_indice, i64 0 - %ptrs = getelementptr inbounds i64, <3 x i64*> %baseptrs, <3 x i64> %indices.new + %ptrs = getelementptr inbounds i64, <3 x ptr> %baseptrs, <3 x i64> %indices.new - %ptr.0 = extractelement <3 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <3 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <3 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <3 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <3 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <3 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) ret void } -define void @first_indice_and_baseptr_are_known.3elts(<3 x i64*> %baseptrs, i64* %second_baseptr, <3 x i64> %indices, i64 %second_indice) { +define void @first_indice_and_baseptr_are_known.3elts(<3 x ptr> %baseptrs, ptr %second_baseptr, <3 x i64> %indices, i64 %second_indice) { ; CHECK-LABEL: @first_indice_and_baseptr_are_known.3elts( -; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <3 x i64*> [[BASEPTRS:%.*]], i64* [[SECOND_BASEPTR:%.*]], i64 0 +; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <3 x ptr> [[BASEPTRS:%.*]], ptr [[SECOND_BASEPTR:%.*]], i64 0 ; CHECK-NEXT: [[INDICES_NEW:%.*]] = insertelement <3 x i64> [[INDICES:%.*]], i64 [[SECOND_INDICE:%.*]], i64 0 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x i64*> [[BASEPTRS_NEW]], <3 x i64> [[INDICES_NEW]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x ptr> [[BASEPTRS_NEW]], <3 x i64> [[INDICES_NEW]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) ; CHECK-NEXT: ret void ; - %baseptrs.new = insertelement <3 x i64*> %baseptrs, i64* %second_baseptr, i64 0 + %baseptrs.new = insertelement <3 x ptr> %baseptrs, ptr %second_baseptr, i64 0 %indices.new = insertelement <3 x i64> %indices, i64 %second_indice, i64 0 - %ptrs = getelementptr inbounds i64, <3 x i64*> %baseptrs.new, <3 x i64> %indices.new + %ptrs = getelementptr inbounds i64, <3 x ptr> %baseptrs.new, <3 x i64> %indices.new - %ptr.0 = extractelement <3 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <3 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <3 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <3 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <3 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <3 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) ret void } ;------------------------------------------------------------------------------- -define void @first_two_baseptrs_is_known.3elts(<3 x i64*> %baseptrs, i64* %second_baseptr, i64* %third_baseptr, <3 x i64> %indices) { +define void @first_two_baseptrs_is_known.3elts(<3 x ptr> %baseptrs, ptr %second_baseptr, ptr %third_baseptr, <3 x i64> %indices) { ; CHECK-LABEL: @first_two_baseptrs_is_known.3elts( -; CHECK-NEXT: [[BASEPTRS_NEW_TMP:%.*]] = insertelement <3 x i64*> [[BASEPTRS:%.*]], i64* [[SECOND_BASEPTR:%.*]], i64 0 -; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <3 x i64*> [[BASEPTRS_NEW_TMP]], i64* [[THIRD_BASEPTR:%.*]], i64 1 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x i64*> [[BASEPTRS_NEW]], <3 x i64> [[INDICES:%.*]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) +; CHECK-NEXT: [[BASEPTRS_NEW_TMP:%.*]] = insertelement <3 x ptr> [[BASEPTRS:%.*]], ptr [[SECOND_BASEPTR:%.*]], i64 0 +; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <3 x ptr> [[BASEPTRS_NEW_TMP]], ptr [[THIRD_BASEPTR:%.*]], i64 1 +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x ptr> [[BASEPTRS_NEW]], <3 x i64> [[INDICES:%.*]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) ; CHECK-NEXT: ret void ; - %baseptrs.new.tmp = insertelement <3 x i64*> %baseptrs, i64* %second_baseptr, i64 0 - %baseptrs.new = insertelement <3 x i64*> %baseptrs.new.tmp, i64* %third_baseptr, i64 1 - %ptrs = getelementptr inbounds i64, <3 x i64*> %baseptrs.new, <3 x i64> %indices + %baseptrs.new.tmp = insertelement <3 x ptr> %baseptrs, ptr %second_baseptr, i64 0 + %baseptrs.new = insertelement <3 x ptr> %baseptrs.new.tmp, ptr %third_baseptr, i64 1 + %ptrs = getelementptr inbounds i64, <3 x ptr> %baseptrs.new, <3 x i64> %indices - %ptr.0 = extractelement <3 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <3 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <3 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <3 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <3 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <3 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) ret void } -define void @first_two_indices_is_known.3elts(<3 x i64*> %baseptrs, <3 x i64> %indices, i64 %second_indice, i64 %third_indice) { +define void @first_two_indices_is_known.3elts(<3 x ptr> %baseptrs, <3 x i64> %indices, i64 %second_indice, i64 %third_indice) { ; CHECK-LABEL: @first_two_indices_is_known.3elts( ; CHECK-NEXT: [[INDICES_NEW_TMP:%.*]] = insertelement <3 x i64> [[INDICES:%.*]], i64 [[SECOND_INDICE:%.*]], i64 0 ; CHECK-NEXT: [[INDICES_NEW:%.*]] = insertelement <3 x i64> [[INDICES_NEW_TMP]], i64 [[THIRD_INDICE:%.*]], i64 1 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x i64*> [[BASEPTRS:%.*]], <3 x i64> [[INDICES_NEW]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x ptr> [[BASEPTRS:%.*]], <3 x i64> [[INDICES_NEW]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) ; CHECK-NEXT: ret void ; %indices.new.tmp = insertelement <3 x i64> %indices, i64 %second_indice, i64 0 %indices.new = insertelement <3 x i64> %indices.new.tmp, i64 %third_indice, i64 1 - %ptrs = getelementptr inbounds i64, <3 x i64*> %baseptrs, <3 x i64> %indices.new + %ptrs = getelementptr inbounds i64, <3 x ptr> %baseptrs, <3 x i64> %indices.new - %ptr.0 = extractelement <3 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <3 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <3 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <3 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <3 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <3 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) ret void } -define void @first_two_indices_and_first_two_baseptrs_are_known.3elts(<3 x i64*> %baseptrs, i64* %second_baseptr, i64* %third_baseptr, <3 x i64> %indices, i64 %second_indice, i64 %third_indice) { +define void @first_two_indices_and_first_two_baseptrs_are_known.3elts(<3 x ptr> %baseptrs, ptr %second_baseptr, ptr %third_baseptr, <3 x i64> %indices, i64 %second_indice, i64 %third_indice) { ; CHECK-LABEL: @first_two_indices_and_first_two_baseptrs_are_known.3elts( -; CHECK-NEXT: [[BASEPTRS_NEW_TMP:%.*]] = insertelement <3 x i64*> [[BASEPTRS:%.*]], i64* [[SECOND_BASEPTR:%.*]], i64 0 -; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <3 x i64*> [[BASEPTRS_NEW_TMP]], i64* [[THIRD_BASEPTR:%.*]], i64 1 +; CHECK-NEXT: [[BASEPTRS_NEW_TMP:%.*]] = insertelement <3 x ptr> [[BASEPTRS:%.*]], ptr [[SECOND_BASEPTR:%.*]], i64 0 +; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <3 x ptr> [[BASEPTRS_NEW_TMP]], ptr [[THIRD_BASEPTR:%.*]], i64 1 ; CHECK-NEXT: [[INDICES_NEW_TMP:%.*]] = insertelement <3 x i64> [[INDICES:%.*]], i64 [[SECOND_INDICE:%.*]], i64 0 ; CHECK-NEXT: [[INDICES_NEW:%.*]] = insertelement <3 x i64> [[INDICES_NEW_TMP]], i64 [[THIRD_INDICE:%.*]], i64 1 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x i64*> [[BASEPTRS_NEW]], <3 x i64> [[INDICES_NEW]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x ptr> [[BASEPTRS_NEW]], <3 x i64> [[INDICES_NEW]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) ; CHECK-NEXT: ret void ; - %baseptrs.new.tmp = insertelement <3 x i64*> %baseptrs, i64* %second_baseptr, i64 0 - %baseptrs.new = insertelement <3 x i64*> %baseptrs.new.tmp, i64* %third_baseptr, i64 1 + %baseptrs.new.tmp = insertelement <3 x ptr> %baseptrs, ptr %second_baseptr, i64 0 + %baseptrs.new = insertelement <3 x ptr> %baseptrs.new.tmp, ptr %third_baseptr, i64 1 %indices.new.tmp = insertelement <3 x i64> %indices, i64 %second_indice, i64 0 %indices.new = insertelement <3 x i64> %indices.new.tmp, i64 %third_indice, i64 1 - %ptrs = getelementptr inbounds i64, <3 x i64*> %baseptrs.new, <3 x i64> %indices.new + %ptrs = getelementptr inbounds i64, <3 x ptr> %baseptrs.new, <3 x i64> %indices.new - %ptr.0 = extractelement <3 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <3 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <3 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <3 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <3 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <3 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) ret void } ;------------------------------------------------------------------------------- -define void @first_two_baseptrs_is_knownequal.3elts(<3 x i64*> %baseptrs, i64* %second_baseptr, <3 x i64> %indices) { +define void @first_two_baseptrs_is_knownequal.3elts(<3 x ptr> %baseptrs, ptr %second_baseptr, <3 x i64> %indices) { ; CHECK-LABEL: @first_two_baseptrs_is_knownequal.3elts( -; CHECK-NEXT: [[BASEPTRS_NEW_TMP:%.*]] = insertelement <3 x i64*> [[BASEPTRS:%.*]], i64* [[SECOND_BASEPTR:%.*]], i64 0 -; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <3 x i64*> [[BASEPTRS_NEW_TMP]], i64* [[SECOND_BASEPTR]], i64 1 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x i64*> [[BASEPTRS_NEW]], <3 x i64> [[INDICES:%.*]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) +; CHECK-NEXT: [[BASEPTRS_NEW_TMP:%.*]] = insertelement <3 x ptr> [[BASEPTRS:%.*]], ptr [[SECOND_BASEPTR:%.*]], i64 0 +; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <3 x ptr> [[BASEPTRS_NEW_TMP]], ptr [[SECOND_BASEPTR]], i64 1 +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x ptr> [[BASEPTRS_NEW]], <3 x i64> [[INDICES:%.*]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) ; CHECK-NEXT: ret void ; - %baseptrs.new.tmp = insertelement <3 x i64*> %baseptrs, i64* %second_baseptr, i64 0 - %baseptrs.new = insertelement <3 x i64*> %baseptrs.new.tmp, i64* %second_baseptr, i64 1 - %ptrs = getelementptr inbounds i64, <3 x i64*> %baseptrs.new, <3 x i64> %indices + %baseptrs.new.tmp = insertelement <3 x ptr> %baseptrs, ptr %second_baseptr, i64 0 + %baseptrs.new = insertelement <3 x ptr> %baseptrs.new.tmp, ptr %second_baseptr, i64 1 + %ptrs = getelementptr inbounds i64, <3 x ptr> %baseptrs.new, <3 x i64> %indices - %ptr.0 = extractelement <3 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <3 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <3 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <3 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <3 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <3 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) ret void } -define void @first_two_indices_is_knownequal.3elts(<3 x i64*> %baseptrs, <3 x i64> %indices, i64 %second_indice) { +define void @first_two_indices_is_knownequal.3elts(<3 x ptr> %baseptrs, <3 x i64> %indices, i64 %second_indice) { ; CHECK-LABEL: @first_two_indices_is_knownequal.3elts( ; CHECK-NEXT: [[INDICES_NEW_TMP:%.*]] = insertelement <3 x i64> [[INDICES:%.*]], i64 [[SECOND_INDICE:%.*]], i64 0 ; CHECK-NEXT: [[INDICES_NEW:%.*]] = insertelement <3 x i64> [[INDICES_NEW_TMP]], i64 [[SECOND_INDICE]], i64 1 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x i64*> [[BASEPTRS:%.*]], <3 x i64> [[INDICES_NEW]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x ptr> [[BASEPTRS:%.*]], <3 x i64> [[INDICES_NEW]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) ; CHECK-NEXT: ret void ; %indices.new.tmp = insertelement <3 x i64> %indices, i64 %second_indice, i64 0 %indices.new = insertelement <3 x i64> %indices.new.tmp, i64 %second_indice, i64 1 - %ptrs = getelementptr inbounds i64, <3 x i64*> %baseptrs, <3 x i64> %indices.new + %ptrs = getelementptr inbounds i64, <3 x ptr> %baseptrs, <3 x i64> %indices.new - %ptr.0 = extractelement <3 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <3 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <3 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <3 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <3 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <3 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) ret void } -define void @first_two_indices_and_first_two_baseptrs_are_knownequal.3elts(<3 x i64*> %baseptrs, i64* %second_baseptr, <3 x i64> %indices, i64 %second_indice) { +define void @first_two_indices_and_first_two_baseptrs_are_knownequal.3elts(<3 x ptr> %baseptrs, ptr %second_baseptr, <3 x i64> %indices, i64 %second_indice) { ; CHECK-LABEL: @first_two_indices_and_first_two_baseptrs_are_knownequal.3elts( -; CHECK-NEXT: [[BASEPTRS_NEW_TMP:%.*]] = insertelement <3 x i64*> [[BASEPTRS:%.*]], i64* [[SECOND_BASEPTR:%.*]], i64 0 -; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <3 x i64*> [[BASEPTRS_NEW_TMP]], i64* [[SECOND_BASEPTR]], i64 1 +; CHECK-NEXT: [[BASEPTRS_NEW_TMP:%.*]] = insertelement <3 x ptr> [[BASEPTRS:%.*]], ptr [[SECOND_BASEPTR:%.*]], i64 0 +; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <3 x ptr> [[BASEPTRS_NEW_TMP]], ptr [[SECOND_BASEPTR]], i64 1 ; CHECK-NEXT: [[INDICES_NEW_TMP:%.*]] = insertelement <3 x i64> [[INDICES:%.*]], i64 [[SECOND_INDICE:%.*]], i64 0 ; CHECK-NEXT: [[INDICES_NEW:%.*]] = insertelement <3 x i64> [[INDICES_NEW_TMP]], i64 [[SECOND_INDICE]], i64 1 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x i64*> [[BASEPTRS_NEW]], <3 x i64> [[INDICES_NEW]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x ptr> [[BASEPTRS_NEW]], <3 x i64> [[INDICES_NEW]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) ; CHECK-NEXT: ret void ; - %baseptrs.new.tmp = insertelement <3 x i64*> %baseptrs, i64* %second_baseptr, i64 0 - %baseptrs.new = insertelement <3 x i64*> %baseptrs.new.tmp, i64* %second_baseptr, i64 1 + %baseptrs.new.tmp = insertelement <3 x ptr> %baseptrs, ptr %second_baseptr, i64 0 + %baseptrs.new = insertelement <3 x ptr> %baseptrs.new.tmp, ptr %second_baseptr, i64 1 %indices.new.tmp = insertelement <3 x i64> %indices, i64 %second_indice, i64 0 %indices.new = insertelement <3 x i64> %indices.new.tmp, i64 %second_indice, i64 1 - %ptrs = getelementptr inbounds i64, <3 x i64*> %baseptrs.new, <3 x i64> %indices.new + %ptrs = getelementptr inbounds i64, <3 x ptr> %baseptrs.new, <3 x i64> %indices.new - %ptr.0 = extractelement <3 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <3 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <3 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <3 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <3 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <3 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) ret void } ;------------------------------------------------------------------------------- -define void @first_baseptr_is_known.4elts(<4 x i64*> %baseptrs, i64* %second_baseptr, <4 x i64> %indices) { +define void @first_baseptr_is_known.4elts(<4 x ptr> %baseptrs, ptr %second_baseptr, <4 x i64> %indices) { ; CHECK-LABEL: @first_baseptr_is_known.4elts( -; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <4 x i64*> [[BASEPTRS:%.*]], i64* [[SECOND_BASEPTR:%.*]], i64 0 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <4 x i64*> [[BASEPTRS_NEW]], <4 x i64> [[INDICES:%.*]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) -; CHECK-NEXT: [[PTR_3:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 3 -; CHECK-NEXT: call void @use(i64* [[PTR_3]]) +; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <4 x ptr> [[BASEPTRS:%.*]], ptr [[SECOND_BASEPTR:%.*]], i64 0 +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <4 x ptr> [[BASEPTRS_NEW]], <4 x i64> [[INDICES:%.*]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) +; CHECK-NEXT: [[PTR_3:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 3 +; CHECK-NEXT: call void @use(ptr [[PTR_3]]) ; CHECK-NEXT: ret void ; - %baseptrs.new = insertelement <4 x i64*> %baseptrs, i64* %second_baseptr, i64 0 - %ptrs = getelementptr inbounds i64, <4 x i64*> %baseptrs.new, <4 x i64> %indices + %baseptrs.new = insertelement <4 x ptr> %baseptrs, ptr %second_baseptr, i64 0 + %ptrs = getelementptr inbounds i64, <4 x ptr> %baseptrs.new, <4 x i64> %indices - %ptr.0 = extractelement <4 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <4 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <4 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <4 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <4 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <4 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) - %ptr.3 = extractelement <4 x i64*> %ptrs, i64 3 - call void @use(i64* %ptr.3) + %ptr.3 = extractelement <4 x ptr> %ptrs, i64 3 + call void @use(ptr %ptr.3) ret void } -define void @first_indice_is_known.4elts(<4 x i64*> %baseptrs, <4 x i64> %indices, i64 %second_indice) { +define void @first_indice_is_known.4elts(<4 x ptr> %baseptrs, <4 x i64> %indices, i64 %second_indice) { ; CHECK-LABEL: @first_indice_is_known.4elts( ; CHECK-NEXT: [[INDICES_NEW:%.*]] = insertelement <4 x i64> [[INDICES:%.*]], i64 [[SECOND_INDICE:%.*]], i64 0 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <4 x i64*> [[BASEPTRS:%.*]], <4 x i64> [[INDICES_NEW]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) -; CHECK-NEXT: [[PTR_3:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 3 -; CHECK-NEXT: call void @use(i64* [[PTR_3]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <4 x ptr> [[BASEPTRS:%.*]], <4 x i64> [[INDICES_NEW]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) +; CHECK-NEXT: [[PTR_3:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 3 +; CHECK-NEXT: call void @use(ptr [[PTR_3]]) ; CHECK-NEXT: ret void ; %indices.new = insertelement <4 x i64> %indices, i64 %second_indice, i64 0 - %ptrs = getelementptr inbounds i64, <4 x i64*> %baseptrs, <4 x i64> %indices.new + %ptrs = getelementptr inbounds i64, <4 x ptr> %baseptrs, <4 x i64> %indices.new - %ptr.0 = extractelement <4 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <4 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <4 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <4 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <4 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <4 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) - %ptr.3 = extractelement <4 x i64*> %ptrs, i64 3 - call void @use(i64* %ptr.3) + %ptr.3 = extractelement <4 x ptr> %ptrs, i64 3 + call void @use(ptr %ptr.3) ret void } -define void @first_indice_and_first_baseptr_are_known.4elts(<4 x i64*> %baseptrs, i64* %second_baseptr, <4 x i64> %indices, i64 %second_indice) { +define void @first_indice_and_first_baseptr_are_known.4elts(<4 x ptr> %baseptrs, ptr %second_baseptr, <4 x i64> %indices, i64 %second_indice) { ; CHECK-LABEL: @first_indice_and_first_baseptr_are_known.4elts( -; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <4 x i64*> [[BASEPTRS:%.*]], i64* [[SECOND_BASEPTR:%.*]], i64 0 +; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <4 x ptr> [[BASEPTRS:%.*]], ptr [[SECOND_BASEPTR:%.*]], i64 0 ; CHECK-NEXT: [[INDICES_NEW:%.*]] = insertelement <4 x i64> [[INDICES:%.*]], i64 [[SECOND_INDICE:%.*]], i64 0 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <4 x i64*> [[BASEPTRS_NEW]], <4 x i64> [[INDICES_NEW]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) -; CHECK-NEXT: [[PTR_3:%.*]] = extractelement <4 x i64*> [[PTRS]], i64 3 -; CHECK-NEXT: call void @use(i64* [[PTR_3]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <4 x ptr> [[BASEPTRS_NEW]], <4 x i64> [[INDICES_NEW]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) +; CHECK-NEXT: [[PTR_3:%.*]] = extractelement <4 x ptr> [[PTRS]], i64 3 +; CHECK-NEXT: call void @use(ptr [[PTR_3]]) ; CHECK-NEXT: ret void ; - %baseptrs.new = insertelement <4 x i64*> %baseptrs, i64* %second_baseptr, i64 0 + %baseptrs.new = insertelement <4 x ptr> %baseptrs, ptr %second_baseptr, i64 0 %indices.new = insertelement <4 x i64> %indices, i64 %second_indice, i64 0 - %ptrs = getelementptr inbounds i64, <4 x i64*> %baseptrs.new, <4 x i64> %indices.new + %ptrs = getelementptr inbounds i64, <4 x ptr> %baseptrs.new, <4 x i64> %indices.new - %ptr.0 = extractelement <4 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <4 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <4 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <4 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <4 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <4 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) - %ptr.3 = extractelement <4 x i64*> %ptrs, i64 3 - call void @use(i64* %ptr.3) + %ptr.3 = extractelement <4 x ptr> %ptrs, i64 3 + call void @use(ptr %ptr.3) ret void } ;------------------------------------------------------------------------------- -define void @first_two_baseptrs_is_knownequal.4elts(<3 x i64*> %baseptrs, i64* %second_baseptr, <3 x i64> %indices) { +define void @first_two_baseptrs_is_knownequal.4elts(<3 x ptr> %baseptrs, ptr %second_baseptr, <3 x i64> %indices) { ; CHECK-LABEL: @first_two_baseptrs_is_knownequal.4elts( -; CHECK-NEXT: [[BASEPTRS_NEW_TMP:%.*]] = insertelement <3 x i64*> [[BASEPTRS:%.*]], i64* [[SECOND_BASEPTR:%.*]], i64 0 -; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <3 x i64*> [[BASEPTRS_NEW_TMP]], i64* [[SECOND_BASEPTR]], i64 1 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x i64*> [[BASEPTRS_NEW]], <3 x i64> [[INDICES:%.*]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) +; CHECK-NEXT: [[BASEPTRS_NEW_TMP:%.*]] = insertelement <3 x ptr> [[BASEPTRS:%.*]], ptr [[SECOND_BASEPTR:%.*]], i64 0 +; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <3 x ptr> [[BASEPTRS_NEW_TMP]], ptr [[SECOND_BASEPTR]], i64 1 +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x ptr> [[BASEPTRS_NEW]], <3 x i64> [[INDICES:%.*]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) ; CHECK-NEXT: ret void ; - %baseptrs.new.tmp = insertelement <3 x i64*> %baseptrs, i64* %second_baseptr, i64 0 - %baseptrs.new = insertelement <3 x i64*> %baseptrs.new.tmp, i64* %second_baseptr, i64 1 - %ptrs = getelementptr inbounds i64, <3 x i64*> %baseptrs.new, <3 x i64> %indices + %baseptrs.new.tmp = insertelement <3 x ptr> %baseptrs, ptr %second_baseptr, i64 0 + %baseptrs.new = insertelement <3 x ptr> %baseptrs.new.tmp, ptr %second_baseptr, i64 1 + %ptrs = getelementptr inbounds i64, <3 x ptr> %baseptrs.new, <3 x i64> %indices - %ptr.0 = extractelement <3 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <3 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <3 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <3 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <3 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <3 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) ret void } -define void @first_two_indices_is_knownequal.4elts(<3 x i64*> %baseptrs, <3 x i64> %indices, i64 %second_indice) { +define void @first_two_indices_is_knownequal.4elts(<3 x ptr> %baseptrs, <3 x i64> %indices, i64 %second_indice) { ; CHECK-LABEL: @first_two_indices_is_knownequal.4elts( ; CHECK-NEXT: [[INDICES_NEW_TMP:%.*]] = insertelement <3 x i64> [[INDICES:%.*]], i64 [[SECOND_INDICE:%.*]], i64 0 ; CHECK-NEXT: [[INDICES_NEW:%.*]] = insertelement <3 x i64> [[INDICES_NEW_TMP]], i64 [[SECOND_INDICE]], i64 1 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x i64*> [[BASEPTRS:%.*]], <3 x i64> [[INDICES_NEW]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x ptr> [[BASEPTRS:%.*]], <3 x i64> [[INDICES_NEW]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) ; CHECK-NEXT: ret void ; %indices.new.tmp = insertelement <3 x i64> %indices, i64 %second_indice, i64 0 %indices.new = insertelement <3 x i64> %indices.new.tmp, i64 %second_indice, i64 1 - %ptrs = getelementptr inbounds i64, <3 x i64*> %baseptrs, <3 x i64> %indices.new + %ptrs = getelementptr inbounds i64, <3 x ptr> %baseptrs, <3 x i64> %indices.new - %ptr.0 = extractelement <3 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <3 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <3 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <3 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <3 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <3 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) ret void } -define void @first_two_indices_and_first_two_baseptrs_are_knownequal.4elts(<3 x i64*> %baseptrs, i64* %second_baseptr, <3 x i64> %indices, i64 %second_indice) { +define void @first_two_indices_and_first_two_baseptrs_are_knownequal.4elts(<3 x ptr> %baseptrs, ptr %second_baseptr, <3 x i64> %indices, i64 %second_indice) { ; CHECK-LABEL: @first_two_indices_and_first_two_baseptrs_are_knownequal.4elts( -; CHECK-NEXT: [[BASEPTRS_NEW_TMP:%.*]] = insertelement <3 x i64*> [[BASEPTRS:%.*]], i64* [[SECOND_BASEPTR:%.*]], i64 0 -; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <3 x i64*> [[BASEPTRS_NEW_TMP]], i64* [[SECOND_BASEPTR]], i64 1 +; CHECK-NEXT: [[BASEPTRS_NEW_TMP:%.*]] = insertelement <3 x ptr> [[BASEPTRS:%.*]], ptr [[SECOND_BASEPTR:%.*]], i64 0 +; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <3 x ptr> [[BASEPTRS_NEW_TMP]], ptr [[SECOND_BASEPTR]], i64 1 ; CHECK-NEXT: [[INDICES_NEW_TMP:%.*]] = insertelement <3 x i64> [[INDICES:%.*]], i64 [[SECOND_INDICE:%.*]], i64 0 ; CHECK-NEXT: [[INDICES_NEW:%.*]] = insertelement <3 x i64> [[INDICES_NEW_TMP]], i64 [[SECOND_INDICE]], i64 1 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x i64*> [[BASEPTRS_NEW]], <3 x i64> [[INDICES_NEW]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x i64*> [[PTRS]], i64 2 -; CHECK-NEXT: call void @use(i64* [[PTR_2]]) +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <3 x ptr> [[BASEPTRS_NEW]], <3 x i64> [[INDICES_NEW]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = extractelement <3 x ptr> [[PTRS]], i64 2 +; CHECK-NEXT: call void @use(ptr [[PTR_2]]) ; CHECK-NEXT: ret void ; - %baseptrs.new.tmp = insertelement <3 x i64*> %baseptrs, i64* %second_baseptr, i64 0 - %baseptrs.new = insertelement <3 x i64*> %baseptrs.new.tmp, i64* %second_baseptr, i64 1 + %baseptrs.new.tmp = insertelement <3 x ptr> %baseptrs, ptr %second_baseptr, i64 0 + %baseptrs.new = insertelement <3 x ptr> %baseptrs.new.tmp, ptr %second_baseptr, i64 1 %indices.new.tmp = insertelement <3 x i64> %indices, i64 %second_indice, i64 0 %indices.new = insertelement <3 x i64> %indices.new.tmp, i64 %second_indice, i64 1 - %ptrs = getelementptr inbounds i64, <3 x i64*> %baseptrs.new, <3 x i64> %indices.new + %ptrs = getelementptr inbounds i64, <3 x ptr> %baseptrs.new, <3 x i64> %indices.new - %ptr.0 = extractelement <3 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <3 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <3 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <3 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.2 = extractelement <3 x i64*> %ptrs, i64 2 - call void @use(i64* %ptr.2) + %ptr.2 = extractelement <3 x ptr> %ptrs, i64 2 + call void @use(ptr %ptr.2) ret void } ;=============================================================================== -define <2 x i64*> @first_indice_and_baseptr_need_extraction.2elts.extrause(<2 x i64*> %baseptrs, i64* %second_baseptr, <2 x i64> %indices, i64 %second_indice) { +define <2 x ptr> @first_indice_and_baseptr_need_extraction.2elts.extrause(<2 x ptr> %baseptrs, ptr %second_baseptr, <2 x i64> %indices, i64 %second_indice) { ; CHECK-LABEL: @first_indice_and_baseptr_need_extraction.2elts.extrause( -; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <2 x i64*> [[BASEPTRS:%.*]], i64* [[SECOND_BASEPTR:%.*]], i64 0 +; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <2 x ptr> [[BASEPTRS:%.*]], ptr [[SECOND_BASEPTR:%.*]], i64 0 ; CHECK-NEXT: [[INDICES_NEW:%.*]] = insertelement <2 x i64> [[INDICES:%.*]], i64 [[SECOND_INDICE:%.*]], i64 0 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <2 x i64*> [[BASEPTRS_NEW]], <2 x i64> [[INDICES_NEW]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: ret <2 x i64*> [[PTRS]] +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <2 x ptr> [[BASEPTRS_NEW]], <2 x i64> [[INDICES_NEW]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: ret <2 x ptr> [[PTRS]] ; - %baseptrs.new = insertelement <2 x i64*> %baseptrs, i64* %second_baseptr, i64 0 + %baseptrs.new = insertelement <2 x ptr> %baseptrs, ptr %second_baseptr, i64 0 %indices.new = insertelement <2 x i64> %indices, i64 %second_indice, i64 0 - %ptrs = getelementptr inbounds i64, <2 x i64*> %baseptrs.new, <2 x i64> %indices.new + %ptrs = getelementptr inbounds i64, <2 x ptr> %baseptrs.new, <2 x i64> %indices.new - %ptr.0 = extractelement <2 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <2 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <2 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <2 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - ret <2 x i64*> %ptrs + ret <2 x ptr> %ptrs } -define i64* @first_indice_and_baseptr_need_extraction.2elts.variable_extraction(<2 x i64*> %baseptrs, i64* %second_baseptr, <2 x i64> %indices, i64 %second_indice, i64 %variable_extract_idx) { +define ptr @first_indice_and_baseptr_need_extraction.2elts.variable_extraction(<2 x ptr> %baseptrs, ptr %second_baseptr, <2 x i64> %indices, i64 %second_indice, i64 %variable_extract_idx) { ; CHECK-LABEL: @first_indice_and_baseptr_need_extraction.2elts.variable_extraction( -; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <2 x i64*> [[BASEPTRS:%.*]], i64* [[SECOND_BASEPTR:%.*]], i64 0 +; CHECK-NEXT: [[BASEPTRS_NEW:%.*]] = insertelement <2 x ptr> [[BASEPTRS:%.*]], ptr [[SECOND_BASEPTR:%.*]], i64 0 ; CHECK-NEXT: [[INDICES_NEW:%.*]] = insertelement <2 x i64> [[INDICES:%.*]], i64 [[SECOND_INDICE:%.*]], i64 0 -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <2 x i64*> [[BASEPTRS_NEW]], <2 x i64> [[INDICES_NEW]] -; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 0 -; CHECK-NEXT: call void @use(i64* [[PTR_0]]) -; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 1 -; CHECK-NEXT: call void @use(i64* [[PTR_1]]) -; CHECK-NEXT: [[PTR_VAR:%.*]] = extractelement <2 x i64*> [[PTRS]], i64 [[VARIABLE_EXTRACT_IDX:%.*]] -; CHECK-NEXT: ret i64* [[PTR_VAR]] +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i64, <2 x ptr> [[BASEPTRS_NEW]], <2 x i64> [[INDICES_NEW]] +; CHECK-NEXT: [[PTR_0:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 0 +; CHECK-NEXT: call void @use(ptr [[PTR_0]]) +; CHECK-NEXT: [[PTR_1:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 1 +; CHECK-NEXT: call void @use(ptr [[PTR_1]]) +; CHECK-NEXT: [[PTR_VAR:%.*]] = extractelement <2 x ptr> [[PTRS]], i64 [[VARIABLE_EXTRACT_IDX:%.*]] +; CHECK-NEXT: ret ptr [[PTR_VAR]] ; - %baseptrs.new = insertelement <2 x i64*> %baseptrs, i64* %second_baseptr, i64 0 + %baseptrs.new = insertelement <2 x ptr> %baseptrs, ptr %second_baseptr, i64 0 %indices.new = insertelement <2 x i64> %indices, i64 %second_indice, i64 0 - %ptrs = getelementptr inbounds i64, <2 x i64*> %baseptrs.new, <2 x i64> %indices.new + %ptrs = getelementptr inbounds i64, <2 x ptr> %baseptrs.new, <2 x i64> %indices.new - %ptr.0 = extractelement <2 x i64*> %ptrs, i64 0 - call void @use(i64* %ptr.0) + %ptr.0 = extractelement <2 x ptr> %ptrs, i64 0 + call void @use(ptr %ptr.0) - %ptr.1 = extractelement <2 x i64*> %ptrs, i64 1 - call void @use(i64* %ptr.1) + %ptr.1 = extractelement <2 x ptr> %ptrs, i64 1 + call void @use(ptr %ptr.1) - %ptr.var = extractelement <2 x i64*> %ptrs, i64 %variable_extract_idx + %ptr.var = extractelement <2 x ptr> %ptrs, i64 %variable_extract_idx - ret i64* %ptr.var + ret ptr %ptr.var } diff --git a/llvm/test/Transforms/VectorCombine/load-insert-store.ll b/llvm/test/Transforms/VectorCombine/load-insert-store.ll index 63a990e..7f3dc7d 100644 --- a/llvm/test/Transforms/VectorCombine/load-insert-store.ll +++ b/llvm/test/Transforms/VectorCombine/load-insert-store.ll @@ -2,601 +2,593 @@ ; RUN: opt -S -passes=vector-combine -data-layout=e < %s | FileCheck %s ; RUN: opt -S -passes=vector-combine -data-layout=E < %s | FileCheck %s -define void @insert_store(<16 x i8>* %q, i8 zeroext %s) { +define void @insert_store(ptr %q, i8 zeroext %s) { ; CHECK-LABEL: @insert_store( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 3 -; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[Q:%.*]], i32 0, i32 3 +; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1 ; CHECK-NEXT: ret void ; entry: - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %vecins = insertelement <16 x i8> %0, i8 %s, i32 3 - store <16 x i8> %vecins, <16 x i8>* %q, align 16 + store <16 x i8> %vecins, ptr %q, align 16 ret void } -define void @insert_store_i16_align1(<8 x i16>* %q, i16 zeroext %s) { +define void @insert_store_i16_align1(ptr %q, i16 zeroext %s) { ; CHECK-LABEL: @insert_store_i16_align1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[Q:%.*]], i32 0, i32 3 -; CHECK-NEXT: store i16 [[S:%.*]], i16* [[TMP0]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q:%.*]], i32 0, i32 3 +; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 2 ; CHECK-NEXT: ret void ; entry: - %0 = load <8 x i16>, <8 x i16>* %q + %0 = load <8 x i16>, ptr %q %vecins = insertelement <8 x i16> %0, i16 %s, i32 3 - store <8 x i16> %vecins, <8 x i16>* %q, align 1 + store <8 x i16> %vecins, ptr %q, align 1 ret void } ; To verify case when index is out of bounds -define void @insert_store_outofbounds(<8 x i16>* %q, i16 zeroext %s) { +define void @insert_store_outofbounds(ptr %q, i16 zeroext %s) { ; CHECK-LABEL: @insert_store_outofbounds( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 9 -; CHECK-NEXT: store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16 +; CHECK-NEXT: store <8 x i16> [[VECINS]], ptr [[Q]], align 16 ; CHECK-NEXT: ret void ; entry: - %0 = load <8 x i16>, <8 x i16>* %q + %0 = load <8 x i16>, ptr %q %vecins = insertelement <8 x i16> %0, i16 %s, i32 9 - store <8 x i16> %vecins, <8 x i16>* %q + store <8 x i16> %vecins, ptr %q ret void } -define void @insert_store_vscale(* %q, i16 zeroext %s) { +define void @insert_store_vscale(ptr %q, i16 zeroext %s) { ; CHECK-LABEL: @insert_store_vscale( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load , * [[Q:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[Q:%.*]], align 16 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement [[TMP0]], i16 [[S:%.*]], i32 3 -; CHECK-NEXT: store [[VECINS]], * [[Q]], align 16 +; CHECK-NEXT: store [[VECINS]], ptr [[Q]], align 16 ; CHECK-NEXT: ret void ; entry: - %0 = load , * %q + %0 = load , ptr %q %vecins = insertelement %0, i16 %s, i32 3 - store %vecins, * %q + store %vecins, ptr %q ret void } -define void @insert_store_v9i4(<9 x i4>* %q, i4 zeroext %s) { +define void @insert_store_v9i4(ptr %q, i4 zeroext %s) { ; CHECK-LABEL: @insert_store_v9i4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <9 x i4>, <9 x i4>* [[Q:%.*]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load <9 x i4>, ptr [[Q:%.*]], align 8 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <9 x i4> [[TMP0]], i4 [[S:%.*]], i32 3 -; CHECK-NEXT: store <9 x i4> [[VECINS]], <9 x i4>* [[Q]], align 1 +; CHECK-NEXT: store <9 x i4> [[VECINS]], ptr [[Q]], align 1 ; CHECK-NEXT: ret void ; entry: - %0 = load <9 x i4>, <9 x i4>* %q + %0 = load <9 x i4>, ptr %q %vecins = insertelement <9 x i4> %0, i4 %s, i32 3 - store <9 x i4> %vecins, <9 x i4>* %q, align 1 + store <9 x i4> %vecins, ptr %q, align 1 ret void } -define void @insert_store_v4i27(<4 x i27>* %q, i27 zeroext %s) { +define void @insert_store_v4i27(ptr %q, i27 zeroext %s) { ; CHECK-LABEL: @insert_store_v4i27( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i27>, <4 x i27>* [[Q:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i27>, ptr [[Q:%.*]], align 16 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x i27> [[TMP0]], i27 [[S:%.*]], i32 3 -; CHECK-NEXT: store <4 x i27> [[VECINS]], <4 x i27>* [[Q]], align 1 +; CHECK-NEXT: store <4 x i27> [[VECINS]], ptr [[Q]], align 1 ; CHECK-NEXT: ret void ; entry: - %0 = load <4 x i27>, <4 x i27>* %q + %0 = load <4 x i27>, ptr %q %vecins = insertelement <4 x i27> %0, i27 %s, i32 3 - store <4 x i27> %vecins, <4 x i27>* %q, align 1 + store <4 x i27> %vecins, ptr %q, align 1 ret void } -define void @insert_store_blk_differ(<8 x i16>* %q, i16 zeroext %s) { +define void @insert_store_blk_differ(ptr %q, i16 zeroext %s) { ; CHECK-LABEL: @insert_store_blk_differ( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16 ; CHECK-NEXT: br label [[CONT:%.*]] ; CHECK: cont: ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3 -; CHECK-NEXT: store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16 +; CHECK-NEXT: store <8 x i16> [[VECINS]], ptr [[Q]], align 16 ; CHECK-NEXT: ret void ; entry: - %0 = load <8 x i16>, <8 x i16>* %q + %0 = load <8 x i16>, ptr %q br label %cont cont: %vecins = insertelement <8 x i16> %0, i16 %s, i32 3 - store <8 x i16> %vecins, <8 x i16>* %q + store <8 x i16> %vecins, ptr %q ret void } -define void @insert_store_nonconst(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { +define void @insert_store_nonconst(ptr %q, i8 zeroext %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX:%.*]] -; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 +; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16 ; CHECK-NEXT: ret void ; entry: - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx - store <16 x i8> %vecins, <16 x i8>* %q + store <16 x i8> %vecins, ptr %q ret void } ; To verify align here is narrowed to scalar store size -define void @insert_store_nonconst_large_alignment(<4 x i32>* %q, i32 zeroext %s, i32 %idx) { +define void @insert_store_nonconst_large_alignment(ptr %q, i32 zeroext %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst_large_alignment( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[Q:%.*]], i32 0, i32 [[IDX]] -; CHECK-NEXT: store i32 [[S:%.*]], i32* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[Q:%.*]], i32 0, i32 [[IDX]] +; CHECK-NEXT: store i32 [[S:%.*]], ptr [[TMP0]], align 4 ; CHECK-NEXT: ret void ; entry: %cmp = icmp ult i32 %idx, 4 call void @llvm.assume(i1 %cmp) - %i = load <4 x i32>, <4 x i32>* %q, align 128 + %i = load <4 x i32>, ptr %q, align 128 %vecins = insertelement <4 x i32> %i, i32 %s, i32 %idx - store <4 x i32> %vecins, <4 x i32>* %q, align 128 + store <4 x i32> %vecins, ptr %q, align 128 ret void } -define void @insert_store_nonconst_align_maximum_8(<8 x i64>* %q, i64 %s, i32 %idx) { +define void @insert_store_nonconst_align_maximum_8(ptr %q, i64 %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst_align_maximum_8( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]] -; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q:%.*]], i32 0, i32 [[IDX]] +; CHECK-NEXT: store i64 [[S:%.*]], ptr [[TMP1]], align 8 ; CHECK-NEXT: ret void ; %cmp = icmp ult i32 %idx, 2 call void @llvm.assume(i1 %cmp) - %i = load <8 x i64>, <8 x i64>* %q, align 8 + %i = load <8 x i64>, ptr %q, align 8 %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx - store <8 x i64> %vecins, <8 x i64>* %q, align 8 + store <8 x i64> %vecins, ptr %q, align 8 ret void } -define void @insert_store_nonconst_align_maximum_4(<8 x i64>* %q, i64 %s, i32 %idx) { +define void @insert_store_nonconst_align_maximum_4(ptr %q, i64 %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst_align_maximum_4( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]] -; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q:%.*]], i32 0, i32 [[IDX]] +; CHECK-NEXT: store i64 [[S:%.*]], ptr [[TMP1]], align 4 ; CHECK-NEXT: ret void ; %cmp = icmp ult i32 %idx, 2 call void @llvm.assume(i1 %cmp) - %i = load <8 x i64>, <8 x i64>* %q, align 4 + %i = load <8 x i64>, ptr %q, align 4 %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx - store <8 x i64> %vecins, <8 x i64>* %q, align 4 + store <8 x i64> %vecins, ptr %q, align 4 ret void } -define void @insert_store_nonconst_align_larger(<8 x i64>* %q, i64 %s, i32 %idx) { +define void @insert_store_nonconst_align_larger(ptr %q, i64 %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst_align_larger( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]] -; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q:%.*]], i32 0, i32 [[IDX]] +; CHECK-NEXT: store i64 [[S:%.*]], ptr [[TMP1]], align 4 ; CHECK-NEXT: ret void ; %cmp = icmp ult i32 %idx, 2 call void @llvm.assume(i1 %cmp) - %i = load <8 x i64>, <8 x i64>* %q, align 4 + %i = load <8 x i64>, ptr %q, align 4 %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx - store <8 x i64> %vecins, <8 x i64>* %q, align 2 + store <8 x i64> %vecins, ptr %q, align 2 ret void } -define void @insert_store_nonconst_index_known_valid_by_assume(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { +define void @insert_store_nonconst_index_known_valid_by_assume(ptr %q, i8 zeroext %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_assume( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX]] -; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[Q:%.*]], i32 0, i32 [[IDX]] +; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1 ; CHECK-NEXT: ret void ; entry: %cmp = icmp ult i32 %idx, 4 call void @llvm.assume(i1 %cmp) - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx - store <16 x i8> %vecins, <16 x i8>* %q + store <16 x i8> %vecins, ptr %q ret void } declare void @maythrow() readnone -define void @insert_store_nonconst_index_not_known_valid_by_assume_after_load(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { +define void @insert_store_nonconst_index_not_known_valid_by_assume_after_load(ptr %q, i8 zeroext %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_assume_after_load( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4 -; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16 ; CHECK-NEXT: call void @maythrow() ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]] -; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 +; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16 ; CHECK-NEXT: ret void ; entry: %cmp = icmp ult i32 %idx, 4 - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q call void @maythrow() call void @llvm.assume(i1 %cmp) %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx - store <16 x i8> %vecins, <16 x i8>* %q + store <16 x i8> %vecins, ptr %q ret void } -define void @insert_store_nonconst_index_not_known_valid_by_assume(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { +define void @insert_store_nonconst_index_not_known_valid_by_assume(ptr %q, i8 zeroext %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_assume( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 17 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]] -; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 +; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16 ; CHECK-NEXT: ret void ; entry: %cmp = icmp ult i32 %idx, 17 call void @llvm.assume(i1 %cmp) - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx - store <16 x i8> %vecins, <16 x i8>* %q + store <16 x i8> %vecins, ptr %q ret void } declare void @llvm.assume(i1) -define void @insert_store_nonconst_index_known_noundef_and_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) { +define void @insert_store_nonconst_index_known_noundef_and_valid_by_and(ptr %q, i8 zeroext %s, i32 noundef %idx) { ; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_and_valid_by_and( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] -; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] +; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1 ; CHECK-NEXT: ret void ; entry: - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %idx.clamped = and i32 %idx, 7 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped - store <16 x i8> %vecins, <16 x i8>* %q + store <16 x i8> %vecins, ptr %q ret void } -define void @insert_store_nonconst_index_base_frozen_and_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { +define void @insert_store_nonconst_index_base_frozen_and_valid_by_and(ptr %q, i8 zeroext %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst_index_base_frozen_and_valid_by_and( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i32 [[IDX:%.*]] ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX_FROZEN]], 7 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] -; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] +; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1 ; CHECK-NEXT: ret void ; entry: - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %idx.frozen = freeze i32 %idx %idx.clamped = and i32 %idx.frozen, 7 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped - store <16 x i8> %vecins, <16 x i8>* %q + store <16 x i8> %vecins, ptr %q ret void } -define void @insert_store_nonconst_index_frozen_and_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { +define void @insert_store_nonconst_index_frozen_and_valid_by_and(ptr %q, i8 zeroext %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst_index_frozen_and_valid_by_and( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7 ; CHECK-NEXT: [[IDX_CLAMPED_FROZEN:%.*]] = freeze i32 [[IDX_CLAMPED]] ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED_FROZEN]] -; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 +; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16 ; CHECK-NEXT: ret void ; entry: - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %idx.clamped = and i32 %idx, 7 %idx.clamped.frozen = freeze i32 %idx.clamped %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped.frozen - store <16 x i8> %vecins, <16 x i8>* %q + store <16 x i8> %vecins, ptr %q ret void } -define void @insert_store_nonconst_index_known_valid_by_and_but_may_be_poison(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { +define void @insert_store_nonconst_index_known_valid_by_and_but_may_be_poison(ptr %q, i8 zeroext %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_and_but_may_be_poison( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[IDX:%.*]] ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[TMP0]], 7 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] -; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP1]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i8>, ptr [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] +; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP1]], align 1 ; CHECK-NEXT: ret void ; entry: - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %idx.clamped = and i32 %idx, 7 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped - store <16 x i8> %vecins, <16 x i8>* %q + store <16 x i8> %vecins, ptr %q ret void } -define void @insert_store_nonconst_index_not_known_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { +define void @insert_store_nonconst_index_not_known_valid_by_and(ptr %q, i8 zeroext %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_and( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 16 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]] -; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 +; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16 ; CHECK-NEXT: ret void ; entry: - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %idx.clamped = and i32 %idx, 16 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped - store <16 x i8> %vecins, <16 x i8>* %q + store <16 x i8> %vecins, ptr %q ret void } -define void @insert_store_nonconst_index_known_noundef_not_known_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) { +define void @insert_store_nonconst_index_known_noundef_not_known_valid_by_and(ptr %q, i8 zeroext %s, i32 noundef %idx) { ; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_not_known_valid_by_and( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 16 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]] -; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 +; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16 ; CHECK-NEXT: ret void ; entry: - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %idx.clamped = and i32 %idx, 16 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped - store <16 x i8> %vecins, <16 x i8>* %q + store <16 x i8> %vecins, ptr %q ret void } -define void @insert_store_nonconst_index_known_noundef_and_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) { +define void @insert_store_nonconst_index_known_noundef_and_valid_by_urem(ptr %q, i8 zeroext %s, i32 noundef %idx) { ; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_and_valid_by_urem( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] -; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] +; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1 ; CHECK-NEXT: ret void ; entry: - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %idx.clamped = urem i32 %idx, 16 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped - store <16 x i8> %vecins, <16 x i8>* %q + store <16 x i8> %vecins, ptr %q ret void } -define void @insert_store_nonconst_index_base_frozen_and_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { +define void @insert_store_nonconst_index_base_frozen_and_valid_by_urem(ptr %q, i8 zeroext %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst_index_base_frozen_and_valid_by_urem( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i32 [[IDX:%.*]] ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX_FROZEN]], 16 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] -; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] +; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1 ; CHECK-NEXT: ret void ; entry: - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %idx.frozen = freeze i32 %idx %idx.clamped = urem i32 %idx.frozen, 16 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped - store <16 x i8> %vecins, <16 x i8>* %q + store <16 x i8> %vecins, ptr %q ret void } -define void @insert_store_nonconst_index_frozen_and_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { +define void @insert_store_nonconst_index_frozen_and_valid_by_urem(ptr %q, i8 zeroext %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst_index_frozen_and_valid_by_urem( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16 ; CHECK-NEXT: [[IDX_CLAMPED_FROZEN:%.*]] = freeze i32 [[IDX_CLAMPED]] ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED_FROZEN]] -; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 +; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16 ; CHECK-NEXT: ret void ; entry: - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %idx.clamped = urem i32 %idx, 16 %idx.clamped.frozen = freeze i32 %idx.clamped %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped.frozen - store <16 x i8> %vecins, <16 x i8>* %q + store <16 x i8> %vecins, ptr %q ret void } -define void @insert_store_nonconst_index_known_valid_by_urem_but_may_be_poison(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { +define void @insert_store_nonconst_index_known_valid_by_urem_but_may_be_poison(ptr %q, i8 zeroext %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_urem_but_may_be_poison( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[IDX:%.*]] ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[TMP0]], 16 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] -; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP1]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i8>, ptr [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] +; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP1]], align 1 ; CHECK-NEXT: ret void ; entry: - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %idx.clamped = urem i32 %idx, 16 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped - store <16 x i8> %vecins, <16 x i8>* %q + store <16 x i8> %vecins, ptr %q ret void } -define void @insert_store_nonconst_index_not_known_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { +define void @insert_store_nonconst_index_not_known_valid_by_urem(ptr %q, i8 zeroext %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_urem( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 17 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]] -; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 +; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16 ; CHECK-NEXT: ret void ; entry: - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %idx.clamped = urem i32 %idx, 17 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped - store <16 x i8> %vecins, <16 x i8>* %q + store <16 x i8> %vecins, ptr %q ret void } -define void @insert_store_nonconst_index_known_noundef_not_known_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) { +define void @insert_store_nonconst_index_known_noundef_not_known_valid_by_urem(ptr %q, i8 zeroext %s, i32 noundef %idx) { ; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_not_known_valid_by_urem( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 17 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]] -; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16 +; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16 ; CHECK-NEXT: ret void ; entry: - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %idx.clamped = urem i32 %idx, 17 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped - store <16 x i8> %vecins, <16 x i8>* %q + store <16 x i8> %vecins, ptr %q ret void } -define void @insert_store_ptr_strip(<16 x i8>* %q, i8 zeroext %s) { +define void @insert_store_ptr_strip(ptr %q, i8 zeroext %s) { ; CHECK-LABEL: @insert_store_ptr_strip( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR0:%.*]] = bitcast <16 x i8>* [[Q:%.*]] to <2 x i64>* -; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[ADDR0]], i64 0 -; CHECK-NEXT: [[ADDR2:%.*]] = bitcast <2 x i64>* [[ADDR1]] to <16 x i8>* -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[ADDR2]], i32 0, i32 3 -; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[Q:%.*]], i32 0, i32 3 +; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1 ; CHECK-NEXT: ret void ; entry: - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %vecins = insertelement <16 x i8> %0, i8 %s, i32 3 - %addr0 = bitcast <16 x i8>* %q to <2 x i64>* - %addr1 = getelementptr <2 x i64>, <2 x i64>* %addr0, i64 0 - %addr2 = bitcast <2 x i64>* %addr1 to <16 x i8>* - store <16 x i8> %vecins, <16 x i8>* %addr2 + store <16 x i8> %vecins, ptr %q ret void } -define void @volatile_update(<16 x i8>* %q, <16 x i8>* %p, i8 zeroext %s) { +define void @volatile_update(ptr %q, ptr %p, i8 zeroext %s) { ; CHECK-LABEL: @volatile_update( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16 ; CHECK-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 3 -; CHECK-NEXT: store volatile <16 x i8> [[VECINS0]], <16 x i8>* [[Q]], align 16 -; CHECK-NEXT: [[TMP1:%.*]] = load volatile <16 x i8>, <16 x i8>* [[P:%.*]], align 16 +; CHECK-NEXT: store volatile <16 x i8> [[VECINS0]], ptr [[Q]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load volatile <16 x i8>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[S]], i32 1 -; CHECK-NEXT: store <16 x i8> [[VECINS1]], <16 x i8>* [[P]], align 16 +; CHECK-NEXT: store <16 x i8> [[VECINS1]], ptr [[P]], align 16 ; CHECK-NEXT: ret void ; entry: - %0 = load <16 x i8>, <16 x i8>* %q + %0 = load <16 x i8>, ptr %q %vecins0 = insertelement <16 x i8> %0, i8 %s, i32 3 - store volatile <16 x i8> %vecins0, <16 x i8>* %q + store volatile <16 x i8> %vecins0, ptr %q - %1 = load volatile <16 x i8>, <16 x i8>* %p + %1 = load volatile <16 x i8>, ptr %p %vecins1 = insertelement <16 x i8> %1, i8 %s, i32 1 - store <16 x i8> %vecins1, <16 x i8>* %p + store <16 x i8> %vecins1, ptr %p ret void } -define void @insert_store_addr_differ(<16 x i8>* %p, <16 x i8>* %q, i8 %s) { +define void @insert_store_addr_differ(ptr %p, ptr %q, i8 %s) { ; CHECK-LABEL: @insert_store_addr_differ( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16 +; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3 -; CHECK-NEXT: store <16 x i8> [[INS]], <16 x i8>* [[Q:%.*]], align 16 +; CHECK-NEXT: store <16 x i8> [[INS]], ptr [[Q:%.*]], align 16 ; CHECK-NEXT: ret void ; entry: - %ld = load <16 x i8>, <16 x i8>* %p + %ld = load <16 x i8>, ptr %p %ins = insertelement <16 x i8> %ld, i8 %s, i32 3 - store <16 x i8> %ins, <16 x i8>* %q + store <16 x i8> %ins, ptr %q ret void } ; We can't transform if any instr could modify memory in between. -define void @insert_store_mem_modify(<16 x i8>* %p, <16 x i8>* %q, <16 x i8>* noalias %r, i8 %s, i32 %m) { +define void @insert_store_mem_modify(ptr %p, ptr %q, ptr noalias %r, i8 %s, i32 %m) { ; CHECK-LABEL: @insert_store_mem_modify( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16 -; CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[Q:%.*]], align 16 +; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[Q:%.*]], align 16 ; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3 -; CHECK-NEXT: store <16 x i8> [[INS]], <16 x i8>* [[P]], align 16 -; CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[R:%.*]], align 16 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 7 -; CHECK-NEXT: store i8 [[S]], i8* [[TMP0]], align 1 -; CHECK-NEXT: [[PTR0:%.*]] = bitcast <16 x i8>* [[P]] to <4 x i32>* -; CHECK-NEXT: [[LD3:%.*]] = load <4 x i32>, <4 x i32>* [[PTR0]], align 16 -; CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[P]], align 16 +; CHECK-NEXT: store <16 x i8> [[INS]], ptr [[P]], align 16 +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[R:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[Q]], i32 0, i32 7 +; CHECK-NEXT: store i8 [[S]], ptr [[TMP0]], align 1 +; CHECK-NEXT: [[LD3:%.*]] = load <4 x i32>, ptr [[P]], align 16 +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[P]], align 16 ; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i32> [[LD3]], i32 [[M:%.*]], i32 0 -; CHECK-NEXT: store <4 x i32> [[INS3]], <4 x i32>* [[PTR0]], align 16 +; CHECK-NEXT: store <4 x i32> [[INS3]], ptr [[P]], align 16 ; CHECK-NEXT: ret void ; entry: ; p may alias q - %ld = load <16 x i8>, <16 x i8>* %p - store <16 x i8> zeroinitializer, <16 x i8>* %q + %ld = load <16 x i8>, ptr %p + store <16 x i8> zeroinitializer, ptr %q %ins = insertelement <16 x i8> %ld, i8 %s, i32 3 - store <16 x i8> %ins, <16 x i8>* %p + store <16 x i8> %ins, ptr %p ; p never aliases r - %ld2 = load <16 x i8>, <16 x i8>* %q - store <16 x i8> zeroinitializer, <16 x i8>* %r + %ld2 = load <16 x i8>, ptr %q + store <16 x i8> zeroinitializer, ptr %r %ins2 = insertelement <16 x i8> %ld2, i8 %s, i32 7 - store <16 x i8> %ins2, <16 x i8>* %q + store <16 x i8> %ins2, ptr %q ; p must alias ptr0 - %ptr0 = bitcast <16 x i8>* %p to <4 x i32>* - %ld3 = load <4 x i32>, <4 x i32>* %ptr0 - store <16 x i8> zeroinitializer, <16 x i8>* %p + %ld3 = load <4 x i32>, ptr %p + store <16 x i8> zeroinitializer, ptr %p %ins3 = insertelement <4 x i32> %ld3, i32 %m, i32 0 - store <4 x i32> %ins3, <4 x i32>* %ptr0 + store <4 x i32> %ins3, ptr %p ret void } ; Check cases when calls may modify memory -define void @insert_store_with_call(<16 x i8>* %p, <16 x i8>* %q, i8 %s) { +define void @insert_store_with_call(ptr %p, ptr %q, i8 %s) { ; CHECK-LABEL: @insert_store_with_call( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16 -; CHECK-NEXT: call void @maywrite(<16 x i8>* [[P]]) +; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: call void @maywrite(ptr [[P]]) ; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3 -; CHECK-NEXT: store <16 x i8> [[INS]], <16 x i8>* [[P]], align 16 +; CHECK-NEXT: store <16 x i8> [[INS]], ptr [[P]], align 16 ; CHECK-NEXT: call void @foo() -; CHECK-NEXT: call void @nowrite(<16 x i8>* [[P]]) -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P]], i32 0, i32 7 -; CHECK-NEXT: store i8 [[S]], i8* [[TMP0]], align 1 +; CHECK-NEXT: call void @nowrite(ptr [[P]]) +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P]], i32 0, i32 7 +; CHECK-NEXT: store i8 [[S]], ptr [[TMP0]], align 1 ; CHECK-NEXT: ret void ; entry: - %ld = load <16 x i8>, <16 x i8>* %p - call void @maywrite(<16 x i8>* %p) + %ld = load <16 x i8>, ptr %p + call void @maywrite(ptr %p) %ins = insertelement <16 x i8> %ld, i8 %s, i32 3 - store <16 x i8> %ins, <16 x i8>* %p + store <16 x i8> %ins, ptr %p call void @foo() ; Barrier - %ld2 = load <16 x i8>, <16 x i8>* %p - call void @nowrite(<16 x i8>* %p) + %ld2 = load <16 x i8>, ptr %p + call void @nowrite(ptr %p) %ins2 = insertelement <16 x i8> %ld2, i8 %s, i32 7 - store <16 x i8> %ins2, <16 x i8>* %p + store <16 x i8> %ins2, ptr %p ret void } declare void @foo() -declare void @maywrite(<16 x i8>*) -declare void @nowrite(<16 x i8>*) readonly +declare void @maywrite(ptr) +declare void @nowrite(ptr) readonly ; To test if number of instructions in-between exceeds the limit (default 30), ; the combine will quit. -define i32 @insert_store_maximum_scan_instrs(i32 %arg, i16* %arg1, <16 x i8>* %arg2, i8 zeroext %arg3) { +define i32 @insert_store_maximum_scan_instrs(i32 %arg, ptr %arg1, ptr %arg2, i8 zeroext %arg3) { ; CHECK-LABEL: @insert_store_maximum_scan_instrs( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I:%.*]] = or i32 [[ARG:%.*]], 1 -; CHECK-NEXT: [[I4:%.*]] = load <16 x i8>, <16 x i8>* [[ARG2:%.*]], align 16 +; CHECK-NEXT: [[I4:%.*]] = load <16 x i8>, ptr [[ARG2:%.*]], align 16 ; CHECK-NEXT: [[I5:%.*]] = tail call i32 @bar(i32 [[I]], i1 true) ; CHECK-NEXT: [[I6:%.*]] = shl i32 [[ARG]], [[I5]] ; CHECK-NEXT: [[I7:%.*]] = lshr i32 [[I6]], 26 @@ -605,13 +597,13 @@ define i32 @insert_store_maximum_scan_instrs(i32 %arg, i16* %arg1, <16 x i8>* %a ; CHECK-NEXT: [[I10:%.*]] = lshr i32 [[I6]], 11 ; CHECK-NEXT: [[I11:%.*]] = and i32 [[I10]], 32767 ; CHECK-NEXT: [[I12:%.*]] = zext i8 [[I9]] to i64 -; CHECK-NEXT: [[I13:%.*]] = getelementptr inbounds i16, i16* [[ARG1:%.*]], i64 [[I12]] -; CHECK-NEXT: [[I14:%.*]] = load i16, i16* [[I13]], align 2 +; CHECK-NEXT: [[I13:%.*]] = getelementptr inbounds i16, ptr [[ARG1:%.*]], i64 [[I12]] +; CHECK-NEXT: [[I14:%.*]] = load i16, ptr [[I13]], align 2 ; CHECK-NEXT: [[I15:%.*]] = zext i16 [[I14]] to i32 ; CHECK-NEXT: [[I16:%.*]] = add nuw nsw i8 [[I9]], 1 ; CHECK-NEXT: [[I17:%.*]] = zext i8 [[I16]] to i64 -; CHECK-NEXT: [[I18:%.*]] = getelementptr inbounds i16, i16* [[ARG1]], i64 [[I17]] -; CHECK-NEXT: [[I19:%.*]] = load i16, i16* [[I18]], align 2 +; CHECK-NEXT: [[I18:%.*]] = getelementptr inbounds i16, ptr [[ARG1]], i64 [[I17]] +; CHECK-NEXT: [[I19:%.*]] = load i16, ptr [[I18]], align 2 ; CHECK-NEXT: [[I20:%.*]] = zext i16 [[I19]] to i32 ; CHECK-NEXT: [[I21:%.*]] = sub nsw i32 [[I20]], [[I15]] ; CHECK-NEXT: [[I22:%.*]] = mul nsw i32 [[I11]], [[I21]] @@ -629,12 +621,12 @@ define i32 @insert_store_maximum_scan_instrs(i32 %arg, i16* %arg1, <16 x i8>* %a ; CHECK-NEXT: [[I34:%.*]] = select i1 [[I33]], i32 [[ARG]], i32 [[I31]] ; CHECK-NEXT: [[I35:%.*]] = lshr i32 [[I34]], 1 ; CHECK-NEXT: [[I36:%.*]] = insertelement <16 x i8> [[I4]], i8 [[ARG3:%.*]], i32 3 -; CHECK-NEXT: store <16 x i8> [[I36]], <16 x i8>* [[ARG2]], align 16 +; CHECK-NEXT: store <16 x i8> [[I36]], ptr [[ARG2]], align 16 ; CHECK-NEXT: ret i32 [[I35]] ; bb: %i = or i32 %arg, 1 - %i4 = load <16 x i8>, <16 x i8>* %arg2, align 16 + %i4 = load <16 x i8>, ptr %arg2, align 16 %i5 = tail call i32 @bar(i32 %i, i1 true) %i6 = shl i32 %arg, %i5 %i7 = lshr i32 %i6, 26 @@ -643,13 +635,13 @@ bb: %i10 = lshr i32 %i6, 11 %i11 = and i32 %i10, 32767 %i12 = zext i8 %i9 to i64 - %i13 = getelementptr inbounds i16, i16* %arg1, i64 %i12 - %i14 = load i16, i16* %i13, align 2 + %i13 = getelementptr inbounds i16, ptr %arg1, i64 %i12 + %i14 = load i16, ptr %i13, align 2 %i15 = zext i16 %i14 to i32 %i16 = add nuw nsw i8 %i9, 1 %i17 = zext i8 %i16 to i64 - %i18 = getelementptr inbounds i16, i16* %arg1, i64 %i17 - %i19 = load i16, i16* %i18, align 2 + %i18 = getelementptr inbounds i16, ptr %arg1, i64 %i17 + %i19 = load i16, ptr %i18, align 2 %i20 = zext i16 %i19 to i32 %i21 = sub nsw i32 %i20, %i15 %i22 = mul nsw i32 %i11, %i21 @@ -667,7 +659,7 @@ bb: %i34 = select i1 %i33, i32 %arg, i32 %i31 %i35 = lshr i32 %i34, 1 %i36 = insertelement <16 x i8> %i4, i8 %arg3, i32 3 - store <16 x i8> %i36, <16 x i8>* %arg2, align 16 + store <16 x i8> %i36, ptr %arg2, align 16 ret i32 %i35 } -- 2.7.4