From 701890164d567866900f3087ffd2ad4da963111c Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 21 Dec 2022 12:36:15 +0100 Subject: [PATCH] [ARM] Convert some tests to opaque pointers (NFC) --- llvm/test/CodeGen/ARM/ParallelDSP/aliasing.ll | 554 ++++++++++---------- llvm/test/CodeGen/ARM/ParallelDSP/blocks.ll | 324 ++++++------ llvm/test/CodeGen/ARM/ParallelDSP/exchange.ll | 575 ++++++++++----------- .../CodeGen/ARM/ParallelDSP/inner-full-unroll.ll | 146 +++--- llvm/test/CodeGen/ARM/ParallelDSP/overlapping.ll | 280 +++++----- llvm/test/CodeGen/ARM/ParallelDSP/pr43073.ll | 278 +++++----- llvm/test/CodeGen/ARM/ParallelDSP/sext-acc.ll | 158 +++--- llvm/test/CodeGen/ARM/ParallelDSP/smladx-1.ll | 143 +++-- llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-1.ll | 143 +++-- llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-2.ll | 143 +++-- 10 files changed, 1305 insertions(+), 1439 deletions(-) diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/aliasing.ll b/llvm/test/CodeGen/ARM/ParallelDSP/aliasing.ll index 77a3e88..b75f7af 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/aliasing.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/aliasing.ll @@ -5,14 +5,14 @@ ; instruction possibly aliasing any mul load operands; arguments are passed ; without 'restrict' enabled. ; -define dso_local i32 @no_restrict(i32 %arg, i32* nocapture %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { +define dso_local i32 @no_restrict(i32 %arg, ptr nocapture %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LABEL: @no_restrict( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[ARG3:%.*]], align 2 -; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, i16* [[ARG2:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD11:%.*]], [[FOR_BODY]] ] @@ -20,19 +20,19 @@ define dso_local i32 @no_restrict(i32 %arg, i32* nocapture %arg1, i16* nocapture ; CHECK: for.body: ; CHECK-NEXT: [[MAC1_026:%.*]] = phi i32 [ [[ADD11]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[I_025]] -; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -; CHECK-NEXT: store i16 42, i16* [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]] +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: store i16 42, ptr [[ARRAYIDX]], align 2 ; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_025]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[ADD]] -; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[I_025]] -; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX3]], align 2 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]] +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]] +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP0]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV]], [[CONV4]] -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[ADD]] -; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]] +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2 ; CHECK-NEXT: [[CONV7:%.*]] = sext i16 [[TMP3]] to i32 ; CHECK-NEXT: [[CONV8:%.*]] = sext i16 [[TMP1]] to i32 ; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[CONV7]], [[CONV8]] @@ -46,8 +46,8 @@ entry: br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: - %.pre = load i16, i16* %arg3, align 2 - %.pre27 = load i16, i16* %arg2, align 2 + %.pre = load i16, ptr %arg3, align 2 + %.pre27 = load i16, ptr %arg2, align 2 br label %for.body for.cond.cleanup: @@ -57,22 +57,22 @@ for.cond.cleanup: for.body: %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 - %0 = load i16, i16* %arrayidx, align 2 + %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025 + %0 = load i16, ptr %arrayidx, align 2 ; Store inserted here, aliasing with arrayidx, arrayidx1, arrayidx3 - store i16 42, i16* %arrayidx, align 2 + store i16 42, ptr %arrayidx, align 2 %add = add nuw nsw i32 %i.025, 1 - %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add - %1 = load i16, i16* %arrayidx1, align 2 - %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 - %2 = load i16, i16* %arrayidx3, align 2 + %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add + %1 = load i16, ptr %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025 + %2 = load i16, ptr %arrayidx3, align 2 %conv = sext i16 %2 to i32 %conv4 = sext i16 %0 to i32 %mul = mul nsw i32 %conv, %conv4 - %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add - %3 = load i16, i16* %arrayidx6, align 2 + %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add + %3 = load i16, ptr %arrayidx6, align 2 %conv7 = sext i16 %3 to i32 %conv8 = sext i16 %1 to i32 %mul9 = mul nsw i32 %conv7, %conv8 @@ -86,14 +86,14 @@ for.body: ; aliasing one of the mul load operands. Arguments are now annotated with ; 'noalias'. ; -define dso_local i32 @restrict(i32 %arg, i32* noalias %arg1, i16* noalias readonly %arg2, i16* noalias %arg3) { +define dso_local i32 @restrict(i32 %arg, ptr noalias %arg1, ptr noalias readonly %arg2, ptr noalias %arg3) { ; CHECK-LABEL: @restrict( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[ARG3:%.*]], align 2 -; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, i16* [[ARG2:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD11:%.*]], [[FOR_BODY]] ] @@ -101,19 +101,19 @@ define dso_local i32 @restrict(i32 %arg, i32* noalias %arg1, i16* noalias readon ; CHECK: for.body: ; CHECK-NEXT: [[MAC1_026:%.*]] = phi i32 [ [[ADD11]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[I_025]] -; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -; CHECK-NEXT: store i16 42, i16* [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]] +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: store i16 42, ptr [[ARRAYIDX]], align 2 ; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_025]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[ADD]] -; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[I_025]] -; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX3]], align 2 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]] +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]] +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP0]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV]], [[CONV4]] -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[ADD]] -; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]] +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2 ; CHECK-NEXT: [[CONV7:%.*]] = sext i16 [[TMP3]] to i32 ; CHECK-NEXT: [[CONV8:%.*]] = sext i16 [[TMP1]] to i32 ; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[CONV7]], [[CONV8]] @@ -127,8 +127,8 @@ entry: br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: - %.pre = load i16, i16* %arg3, align 2 - %.pre27 = load i16, i16* %arg2, align 2 + %.pre = load i16, ptr %arg3, align 2 + %.pre27 = load i16, ptr %arg2, align 2 br label %for.body for.cond.cleanup: @@ -138,22 +138,22 @@ for.cond.cleanup: for.body: %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 - %0 = load i16, i16* %arrayidx, align 2 + %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025 + %0 = load i16, ptr %arrayidx, align 2 ; Store inserted here, aliasing only with loads from 'arrayidx'. - store i16 42, i16* %arrayidx, align 2 + store i16 42, ptr %arrayidx, align 2 %add = add nuw nsw i32 %i.025, 1 - %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add - %1 = load i16, i16* %arrayidx1, align 2 - %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 - %2 = load i16, i16* %arrayidx3, align 2 + %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add + %1 = load i16, ptr %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025 + %2 = load i16, ptr %arrayidx3, align 2 %conv = sext i16 %2 to i32 %conv4 = sext i16 %0 to i32 %mul = mul nsw i32 %conv, %conv4 - %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add - %3 = load i16, i16* %arrayidx6, align 2 + %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add + %3 = load i16, ptr %arrayidx6, align 2 %conv7 = sext i16 %3 to i32 %conv8 = sext i16 %1 to i32 %mul9 = mul nsw i32 %conv7, %conv8 @@ -166,14 +166,14 @@ for.body: br i1 %exitcond, label %for.body, label %for.cond.cleanup } -define dso_local i32 @store_dominates_all(i32 %arg, i32* nocapture %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { +define dso_local i32 @store_dominates_all(i32 %arg, ptr nocapture %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LABEL: @store_dominates_all( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[ARG3:%.*]], align 2 -; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, i16* [[ARG2:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] @@ -181,23 +181,21 @@ define dso_local i32 @store_dominates_all(i32 %arg, i32* nocapture %arg1, i16* n ; CHECK: for.body: ; CHECK-NEXT: [[MAC1_026:%.*]] = phi i32 [ [[TMP13]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[I_025]] -; CHECK-NEXT: store i16 42, i16* [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[ARRAYIDX]] to i32* -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]] +; CHECK-NEXT: store i16 42, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 ; CHECK-NEXT: [[TMP4:%.*]] = sext i16 [[TMP3]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP2]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 ; CHECK-NEXT: [[TMP7:%.*]] = sext i16 [[TMP6]] to i32 ; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_025]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[ADD]] -; CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[I_025]] -; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX3]], align 2 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[ARRAYIDX3]] to i32* -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 2 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]] +; CHECK-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]] +; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX3]], align 2 ; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i16 ; CHECK-NEXT: [[TMP13]] = call i32 @llvm.arm.smlad(i32 [[TMP11]], i32 [[TMP2]], i32 [[MAC1_026]]) ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP12]] to i32 @@ -207,8 +205,8 @@ define dso_local i32 @store_dominates_all(i32 %arg, i32* nocapture %arg1, i16* n ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP0]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], [[TMP4]] -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[ADD]] -; CHECK-NEXT: [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]] +; CHECK-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2 ; CHECK-NEXT: [[CONV7:%.*]] = sext i16 [[TMP18]] to i32 ; CHECK-NEXT: [[CONV8:%.*]] = sext i16 [[TMP8]] to i32 ; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP17]], [[TMP7]] @@ -222,8 +220,8 @@ entry: br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: - %.pre = load i16, i16* %arg3, align 2 - %.pre27 = load i16, i16* %arg2, align 2 + %.pre = load i16, ptr %arg3, align 2 + %.pre27 = load i16, ptr %arg2, align 2 br label %for.body for.cond.cleanup: @@ -233,19 +231,19 @@ for.cond.cleanup: for.body: %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 - store i16 42, i16* %arrayidx, align 2 - %0 = load i16, i16* %arrayidx, align 2 + %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025 + store i16 42, ptr %arrayidx, align 2 + %0 = load i16, ptr %arrayidx, align 2 %add = add nuw nsw i32 %i.025, 1 - %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add - %1 = load i16, i16* %arrayidx1, align 2 - %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 - %2 = load i16, i16* %arrayidx3, align 2 + %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add + %1 = load i16, ptr %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025 + %2 = load i16, ptr %arrayidx3, align 2 %conv = sext i16 %2 to i32 %conv4 = sext i16 %0 to i32 %mul = mul nsw i32 %conv, %conv4 - %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add - %3 = load i16, i16* %arrayidx6, align 2 + %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add + %3 = load i16, ptr %arrayidx6, align 2 %conv7 = sext i16 %3 to i32 %conv8 = sext i16 %1 to i32 %mul9 = mul nsw i32 %conv7, %conv8 @@ -255,14 +253,14 @@ for.body: br i1 %exitcond, label %for.body, label %for.cond.cleanup } -define dso_local i32 @loads_dominate(i32 %arg, i32* nocapture %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { +define dso_local i32 @loads_dominate(i32 %arg, ptr nocapture %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LABEL: @loads_dominate( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[ARG3:%.*]], align 2 -; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, i16* [[ARG2:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] @@ -270,22 +268,20 @@ define dso_local i32 @loads_dominate(i32 %arg, i32* nocapture %arg1, i16* nocapt ; CHECK: for.body: ; CHECK-NEXT: [[MAC1_026:%.*]] = phi i32 [ [[TMP13]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[I_025]] -; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[ARRAYIDX]] to i32* -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]] +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 ; CHECK-NEXT: [[TMP4:%.*]] = sext i16 [[TMP3]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP2]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 ; CHECK-NEXT: [[TMP7:%.*]] = sext i16 [[TMP6]] to i32 ; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_025]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[ADD]] -; CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[I_025]] -; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX3]], align 2 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[ARRAYIDX3]] to i32* -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 2 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]] +; CHECK-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]] +; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX3]], align 2 ; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i16 ; CHECK-NEXT: [[TMP13]] = call i32 @llvm.arm.smlad(i32 [[TMP11]], i32 [[TMP2]], i32 [[MAC1_026]]) ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP12]] to i32 @@ -295,14 +291,14 @@ define dso_local i32 @loads_dominate(i32 %arg, i32* nocapture %arg1, i16* nocapt ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP0]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], [[TMP4]] -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[ADD]] -; CHECK-NEXT: [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]] +; CHECK-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2 ; CHECK-NEXT: [[CONV7:%.*]] = sext i16 [[TMP18]] to i32 ; CHECK-NEXT: [[CONV8:%.*]] = sext i16 [[TMP8]] to i32 ; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP17]], [[TMP7]] ; CHECK-NEXT: [[ADD10:%.*]] = add i32 [[MUL]], [[MAC1_026]] ; CHECK-NEXT: [[ADD11:%.*]] = add i32 [[MUL9]], [[ADD10]] -; CHECK-NEXT: store i16 42, i16* [[ARRAYIDX]], align 2 +; CHECK-NEXT: store i16 42, ptr [[ARRAYIDX]], align 2 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[ADD]], [[ARG]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]] ; @@ -311,8 +307,8 @@ entry: br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: - %.pre = load i16, i16* %arg3, align 2 - %.pre27 = load i16, i16* %arg2, align 2 + %.pre = load i16, ptr %arg3, align 2 + %.pre27 = load i16, ptr %arg2, align 2 br label %for.body for.cond.cleanup: @@ -322,36 +318,36 @@ for.cond.cleanup: for.body: %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 - %0 = load i16, i16* %arrayidx, align 2 + %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025 + %0 = load i16, ptr %arrayidx, align 2 %add = add nuw nsw i32 %i.025, 1 - %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add - %1 = load i16, i16* %arrayidx1, align 2 - %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 - %2 = load i16, i16* %arrayidx3, align 2 + %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add + %1 = load i16, ptr %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025 + %2 = load i16, ptr %arrayidx3, align 2 %conv = sext i16 %2 to i32 %conv4 = sext i16 %0 to i32 %mul = mul nsw i32 %conv, %conv4 - %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add - %3 = load i16, i16* %arrayidx6, align 2 + %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add + %3 = load i16, ptr %arrayidx6, align 2 %conv7 = sext i16 %3 to i32 %conv8 = sext i16 %1 to i32 %mul9 = mul nsw i32 %conv7, %conv8 %add10 = add i32 %mul, %mac1.026 %add11 = add i32 %mul9, %add10 - store i16 42, i16* %arrayidx, align 2 + store i16 42, ptr %arrayidx, align 2 %exitcond = icmp ne i32 %add, %arg br i1 %exitcond, label %for.body, label %for.cond.cleanup } -define dso_local i32 @store_alias_arg3_legal_1(i32 %arg, i32* nocapture %arg1, i16* noalias nocapture readonly %arg2, i16* nocapture readonly %arg3) { +define dso_local i32 @store_alias_arg3_legal_1(i32 %arg, ptr nocapture %arg1, ptr noalias nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LABEL: @store_alias_arg3_legal_1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[ARG3:%.*]], align 2 -; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, i16* [[ARG2:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] @@ -359,22 +355,20 @@ define dso_local i32 @store_alias_arg3_legal_1(i32 %arg, i32* nocapture %arg1, i ; CHECK: for.body: ; CHECK-NEXT: [[MAC1_026:%.*]] = phi i32 [ [[TMP13]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[I_025]] -; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[ARRAYIDX]] to i32* -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]] +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 ; CHECK-NEXT: [[TMP4:%.*]] = sext i16 [[TMP3]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP2]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 ; CHECK-NEXT: [[TMP7:%.*]] = sext i16 [[TMP6]] to i32 ; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_025]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[ADD]] -; CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[I_025]] -; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX3]], align 2 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[ARRAYIDX3]] to i32* -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 2 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]] +; CHECK-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]] +; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX3]], align 2 ; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i16 ; CHECK-NEXT: [[TMP13]] = call i32 @llvm.arm.smlad(i32 [[TMP11]], i32 [[TMP2]], i32 [[MAC1_026]]) ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP12]] to i32 @@ -384,9 +378,9 @@ define dso_local i32 @store_alias_arg3_legal_1(i32 %arg, i32* nocapture %arg1, i ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP0]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], [[TMP4]] -; CHECK-NEXT: store i16 42, i16* [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[ADD]] -; CHECK-NEXT: [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2 +; CHECK-NEXT: store i16 42, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]] +; CHECK-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2 ; CHECK-NEXT: [[CONV7:%.*]] = sext i16 [[TMP18]] to i32 ; CHECK-NEXT: [[CONV8:%.*]] = sext i16 [[TMP8]] to i32 ; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP17]], [[TMP7]] @@ -400,8 +394,8 @@ entry: br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: - %.pre = load i16, i16* %arg3, align 2 - %.pre27 = load i16, i16* %arg2, align 2 + %.pre = load i16, ptr %arg3, align 2 + %.pre27 = load i16, ptr %arg2, align 2 br label %for.body for.cond.cleanup: @@ -411,19 +405,19 @@ for.cond.cleanup: for.body: %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 - %0 = load i16, i16* %arrayidx, align 2 + %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025 + %0 = load i16, ptr %arrayidx, align 2 %add = add nuw nsw i32 %i.025, 1 - %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add - %1 = load i16, i16* %arrayidx1, align 2 - %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 - %2 = load i16, i16* %arrayidx3, align 2 + %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add + %1 = load i16, ptr %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025 + %2 = load i16, ptr %arrayidx3, align 2 %conv = sext i16 %2 to i32 %conv4 = sext i16 %0 to i32 %mul = mul nsw i32 %conv, %conv4 - store i16 42, i16* %arrayidx, align 2 - %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add - %3 = load i16, i16* %arrayidx6, align 2 + store i16 42, ptr %arrayidx, align 2 + %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add + %3 = load i16, ptr %arrayidx6, align 2 %conv7 = sext i16 %3 to i32 %conv8 = sext i16 %1 to i32 %mul9 = mul nsw i32 %conv7, %conv8 @@ -433,14 +427,14 @@ for.body: br i1 %exitcond, label %for.body, label %for.cond.cleanup } -define dso_local i32 @store_alias_arg3_legal_2(i32 %arg, i32* nocapture %arg1, i16* noalias nocapture readonly %arg2, i16* nocapture readonly %arg3) { +define dso_local i32 @store_alias_arg3_legal_2(i32 %arg, ptr nocapture %arg1, ptr noalias nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LABEL: @store_alias_arg3_legal_2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[ARG3:%.*]], align 2 -; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, i16* [[ARG2:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] @@ -448,23 +442,21 @@ define dso_local i32 @store_alias_arg3_legal_2(i32 %arg, i32* nocapture %arg1, i ; CHECK: for.body: ; CHECK-NEXT: [[MAC1_026:%.*]] = phi i32 [ [[TMP13]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[I_025]] -; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[ARRAYIDX]] to i32* -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]] +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 ; CHECK-NEXT: [[TMP4:%.*]] = sext i16 [[TMP3]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP2]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 ; CHECK-NEXT: [[TMP7:%.*]] = sext i16 [[TMP6]] to i32 ; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_025]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[ADD]] -; CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[I_025]] -; CHECK-NEXT: store i16 42, i16* [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX3]], align 2 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[ARRAYIDX3]] to i32* -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 2 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]] +; CHECK-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]] +; CHECK-NEXT: store i16 42, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX3]], align 2 ; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i16 ; CHECK-NEXT: [[TMP13]] = call i32 @llvm.arm.smlad(i32 [[TMP11]], i32 [[TMP2]], i32 [[MAC1_026]]) ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP12]] to i32 @@ -474,8 +466,8 @@ define dso_local i32 @store_alias_arg3_legal_2(i32 %arg, i32* nocapture %arg1, i ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP0]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], [[TMP4]] -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[ADD]] -; CHECK-NEXT: [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]] +; CHECK-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2 ; CHECK-NEXT: [[CONV7:%.*]] = sext i16 [[TMP18]] to i32 ; CHECK-NEXT: [[CONV8:%.*]] = sext i16 [[TMP8]] to i32 ; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP17]], [[TMP7]] @@ -489,8 +481,8 @@ entry: br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: - %.pre = load i16, i16* %arg3, align 2 - %.pre27 = load i16, i16* %arg2, align 2 + %.pre = load i16, ptr %arg3, align 2 + %.pre27 = load i16, ptr %arg2, align 2 br label %for.body for.cond.cleanup: @@ -500,19 +492,19 @@ for.cond.cleanup: for.body: %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 - %0 = load i16, i16* %arrayidx, align 2 + %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025 + %0 = load i16, ptr %arrayidx, align 2 %add = add nuw nsw i32 %i.025, 1 - %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add - %1 = load i16, i16* %arrayidx1, align 2 - %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 - store i16 42, i16* %arrayidx, align 2 - %2 = load i16, i16* %arrayidx3, align 2 + %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add + %1 = load i16, ptr %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025 + store i16 42, ptr %arrayidx, align 2 + %2 = load i16, ptr %arrayidx3, align 2 %conv = sext i16 %2 to i32 %conv4 = sext i16 %0 to i32 %mul = mul nsw i32 %conv, %conv4 - %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add - %3 = load i16, i16* %arrayidx6, align 2 + %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add + %3 = load i16, ptr %arrayidx6, align 2 %conv7 = sext i16 %3 to i32 %conv8 = sext i16 %1 to i32 %mul9 = mul nsw i32 %conv7, %conv8 @@ -522,14 +514,14 @@ for.body: br i1 %exitcond, label %for.body, label %for.cond.cleanup } -define dso_local i32 @store_alias_arg3_illegal_1(i32 %arg, i32* nocapture %arg1, i16* noalias nocapture readonly %arg2, i16* noalias nocapture %arg3) { +define dso_local i32 @store_alias_arg3_illegal_1(i32 %arg, ptr nocapture %arg1, ptr noalias nocapture readonly %arg2, ptr noalias nocapture %arg3) { ; CHECK-LABEL: @store_alias_arg3_illegal_1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[ARG3:%.*]], align 2 -; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, i16* [[ARG2:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD11:%.*]], [[FOR_BODY]] ] @@ -537,19 +529,19 @@ define dso_local i32 @store_alias_arg3_illegal_1(i32 %arg, i32* nocapture %arg1, ; CHECK: for.body: ; CHECK-NEXT: [[MAC1_026:%.*]] = phi i32 [ [[ADD11]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[I_025]] -; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]] +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 ; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_025]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[ADD]] -; CHECK-NEXT: store i16 42, i16* [[ARRAYIDX1]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[I_025]] -; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX3]], align 2 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]] +; CHECK-NEXT: store i16 42, ptr [[ARRAYIDX1]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]] +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP0]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV]], [[CONV4]] -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[ADD]] -; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]] +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2 ; CHECK-NEXT: [[CONV7:%.*]] = sext i16 [[TMP3]] to i32 ; CHECK-NEXT: [[CONV8:%.*]] = sext i16 [[TMP1]] to i32 ; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[CONV7]], [[CONV8]] @@ -563,8 +555,8 @@ entry: br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: - %.pre = load i16, i16* %arg3, align 2 - %.pre27 = load i16, i16* %arg2, align 2 + %.pre = load i16, ptr %arg3, align 2 + %.pre27 = load i16, ptr %arg2, align 2 br label %for.body for.cond.cleanup: @@ -574,19 +566,19 @@ for.cond.cleanup: for.body: %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 - %0 = load i16, i16* %arrayidx, align 2 + %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025 + %0 = load i16, ptr %arrayidx, align 2 %add = add nuw nsw i32 %i.025, 1 - %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add - store i16 42, i16* %arrayidx1, align 2 - %1 = load i16, i16* %arrayidx1, align 2 - %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 - %2 = load i16, i16* %arrayidx3, align 2 + %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add + store i16 42, ptr %arrayidx1, align 2 + %1 = load i16, ptr %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025 + %2 = load i16, ptr %arrayidx3, align 2 %conv = sext i16 %2 to i32 %conv4 = sext i16 %0 to i32 %mul = mul nsw i32 %conv, %conv4 - %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add - %3 = load i16, i16* %arrayidx6, align 2 + %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add + %3 = load i16, ptr %arrayidx6, align 2 %conv7 = sext i16 %3 to i32 %conv8 = sext i16 %1 to i32 %mul9 = mul nsw i32 %conv7, %conv8 @@ -596,14 +588,14 @@ for.body: br i1 %exitcond, label %for.body, label %for.cond.cleanup } -define dso_local i32 @store_alias_arg3_illegal_2(i32 %arg, i32* nocapture %arg1, i16* noalias nocapture readonly %arg2, i16* noalias nocapture %arg3) { +define dso_local i32 @store_alias_arg3_illegal_2(i32 %arg, ptr nocapture %arg1, ptr noalias nocapture readonly %arg2, ptr noalias nocapture %arg3) { ; CHECK-LABEL: @store_alias_arg3_illegal_2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[ARG3:%.*]], align 2 -; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, i16* [[ARG2:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD11:%.*]], [[FOR_BODY]] ] @@ -611,19 +603,19 @@ define dso_local i32 @store_alias_arg3_illegal_2(i32 %arg, i32* nocapture %arg1, ; CHECK: for.body: ; CHECK-NEXT: [[MAC1_026:%.*]] = phi i32 [ [[ADD11]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[I_025]] -; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]] +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 ; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_025]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[ADD]] -; CHECK-NEXT: store i16 42, i16* [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[I_025]] -; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX3]], align 2 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]] +; CHECK-NEXT: store i16 42, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]] +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP0]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV]], [[CONV4]] -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[ADD]] -; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]] +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2 ; CHECK-NEXT: [[CONV7:%.*]] = sext i16 [[TMP3]] to i32 ; CHECK-NEXT: [[CONV8:%.*]] = sext i16 [[TMP1]] to i32 ; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[CONV7]], [[CONV8]] @@ -637,8 +629,8 @@ entry: br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: - %.pre = load i16, i16* %arg3, align 2 - %.pre27 = load i16, i16* %arg2, align 2 + %.pre = load i16, ptr %arg3, align 2 + %.pre27 = load i16, ptr %arg2, align 2 br label %for.body for.cond.cleanup: @@ -648,19 +640,19 @@ for.cond.cleanup: for.body: %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 - %0 = load i16, i16* %arrayidx, align 2 + %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025 + %0 = load i16, ptr %arrayidx, align 2 %add = add nuw nsw i32 %i.025, 1 - %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add - store i16 42, i16* %arrayidx, align 2 - %1 = load i16, i16* %arrayidx1, align 2 - %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 - %2 = load i16, i16* %arrayidx3, align 2 + %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add + store i16 42, ptr %arrayidx, align 2 + %1 = load i16, ptr %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025 + %2 = load i16, ptr %arrayidx3, align 2 %conv = sext i16 %2 to i32 %conv4 = sext i16 %0 to i32 %mul = mul nsw i32 %conv, %conv4 - %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add - %3 = load i16, i16* %arrayidx6, align 2 + %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add + %3 = load i16, ptr %arrayidx6, align 2 %conv7 = sext i16 %3 to i32 %conv8 = sext i16 %1 to i32 %mul9 = mul nsw i32 %conv7, %conv8 @@ -670,14 +662,14 @@ for.body: br i1 %exitcond, label %for.body, label %for.cond.cleanup } -define dso_local i32 @store_alias_arg2_illegal_1(i32 %arg, i32* nocapture %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { +define dso_local i32 @store_alias_arg2_illegal_1(i32 %arg, ptr nocapture %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LABEL: @store_alias_arg2_illegal_1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[ARG3:%.*]], align 2 -; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, i16* [[ARG2:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD11:%.*]], [[FOR_BODY]] ] @@ -685,19 +677,19 @@ define dso_local i32 @store_alias_arg2_illegal_1(i32 %arg, i32* nocapture %arg1, ; CHECK: for.body: ; CHECK-NEXT: [[MAC1_026:%.*]] = phi i32 [ [[ADD11]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[I_025]] -; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]] +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 ; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_025]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[ADD]] -; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[I_025]] -; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX3]], align 2 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]] +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]] +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP0]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV]], [[CONV4]] -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[ADD]] -; CHECK-NEXT: store i16 42, i16* [[ARRAYIDX6]], align 2 -; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]] +; CHECK-NEXT: store i16 42, ptr [[ARRAYIDX6]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2 ; CHECK-NEXT: [[CONV7:%.*]] = sext i16 [[TMP3]] to i32 ; CHECK-NEXT: [[CONV8:%.*]] = sext i16 [[TMP1]] to i32 ; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[CONV7]], [[CONV8]] @@ -711,8 +703,8 @@ entry: br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: - %.pre = load i16, i16* %arg3, align 2 - %.pre27 = load i16, i16* %arg2, align 2 + %.pre = load i16, ptr %arg3, align 2 + %.pre27 = load i16, ptr %arg2, align 2 br label %for.body for.cond.cleanup: @@ -722,19 +714,19 @@ for.cond.cleanup: for.body: %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 - %0 = load i16, i16* %arrayidx, align 2 + %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025 + %0 = load i16, ptr %arrayidx, align 2 %add = add nuw nsw i32 %i.025, 1 - %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add - %1 = load i16, i16* %arrayidx1, align 2 - %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 - %2 = load i16, i16* %arrayidx3, align 2 + %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add + %1 = load i16, ptr %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025 + %2 = load i16, ptr %arrayidx3, align 2 %conv = sext i16 %2 to i32 %conv4 = sext i16 %0 to i32 %mul = mul nsw i32 %conv, %conv4 - %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add - store i16 42, i16* %arrayidx6, align 2 - %3 = load i16, i16* %arrayidx6, align 2 + %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add + store i16 42, ptr %arrayidx6, align 2 + %3 = load i16, ptr %arrayidx6, align 2 %conv7 = sext i16 %3 to i32 %conv8 = sext i16 %1 to i32 %mul9 = mul nsw i32 %conv7, %conv8 @@ -744,14 +736,14 @@ for.body: br i1 %exitcond, label %for.body, label %for.cond.cleanup } -define dso_local i32 @store_alias_arg2_illegal_2(i32 %arg, i32* nocapture %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { +define dso_local i32 @store_alias_arg2_illegal_2(i32 %arg, ptr nocapture %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LABEL: @store_alias_arg2_illegal_2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[ARG3:%.*]], align 2 -; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, i16* [[ARG2:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2 +; CHECK-NEXT: [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD11:%.*]], [[FOR_BODY]] ] @@ -759,19 +751,19 @@ define dso_local i32 @store_alias_arg2_illegal_2(i32 %arg, i32* nocapture %arg1, ; CHECK: for.body: ; CHECK-NEXT: [[MAC1_026:%.*]] = phi i32 [ [[ADD11]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[I_025]] -; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]] +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 ; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_025]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[ARG3]], i32 [[ADD]] -; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[I_025]] -; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX3]], align 2 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]] +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]] +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP0]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV]], [[CONV4]] -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARG2]], i32 [[ADD]] -; CHECK-NEXT: store i16 42, i16* [[ARRAYIDX3]], align 2 -; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]] +; CHECK-NEXT: store i16 42, ptr [[ARRAYIDX3]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2 ; CHECK-NEXT: [[CONV7:%.*]] = sext i16 [[TMP3]] to i32 ; CHECK-NEXT: [[CONV8:%.*]] = sext i16 [[TMP1]] to i32 ; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[CONV7]], [[CONV8]] @@ -785,8 +777,8 @@ entry: br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: - %.pre = load i16, i16* %arg3, align 2 - %.pre27 = load i16, i16* %arg2, align 2 + %.pre = load i16, ptr %arg3, align 2 + %.pre27 = load i16, ptr %arg2, align 2 br label %for.body for.cond.cleanup: @@ -796,19 +788,19 @@ for.cond.cleanup: for.body: %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 - %0 = load i16, i16* %arrayidx, align 2 + %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025 + %0 = load i16, ptr %arrayidx, align 2 %add = add nuw nsw i32 %i.025, 1 - %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add - %1 = load i16, i16* %arrayidx1, align 2 - %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 - %2 = load i16, i16* %arrayidx3, align 2 + %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add + %1 = load i16, ptr %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025 + %2 = load i16, ptr %arrayidx3, align 2 %conv = sext i16 %2 to i32 %conv4 = sext i16 %0 to i32 %mul = mul nsw i32 %conv, %conv4 - %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add - store i16 42, i16* %arrayidx3, align 2 - %3 = load i16, i16* %arrayidx6, align 2 + %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add + store i16 42, ptr %arrayidx3, align 2 + %3 = load i16, ptr %arrayidx6, align 2 %conv7 = sext i16 %3 to i32 %conv8 = sext i16 %1 to i32 %mul9 = mul nsw i32 %conv7, %conv8 @@ -820,7 +812,7 @@ for.body: ; TODO: I think we should be able to generate one smlad here. The search fails ; when it finds the alias. -define i32 @one_pair_alias(i16* noalias nocapture readonly %b, i16* noalias nocapture %c) { +define i32 @one_pair_alias(ptr noalias nocapture readonly %b, ptr noalias nocapture %c) { ; CHECK-LABEL: @one_pair_alias( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -832,23 +824,23 @@ define i32 @one_pair_alias(i16* noalias nocapture readonly %b, i16* noalias noca ; CHECK-NEXT: [[ADD3:%.*]] = or i32 [[I_050]], 1 ; CHECK-NEXT: [[ADD11:%.*]] = or i32 [[I_050]], 2 ; CHECK-NEXT: [[ADD19:%.*]] = or i32 [[I_050]], 3 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i32 [[I_050]] -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 [[ADD3]] -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 [[ADD11]] -; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 [[ADD19]] -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[C:%.*]], i32 [[I_050]] -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[C]], i32 [[ADD3]] -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i16, i16* [[C]], i32 [[ADD11]] -; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, i16* [[C]], i32 [[ADD19]] -; CHECK-NEXT: [[TMP:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX4]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX12]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX20]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2 -; CHECK-NEXT: store i16 43, i16* [[ARRAYIDX7]], align 2 -; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 -; CHECK-NEXT: [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX15]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX23]], align 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i32 [[I_050]] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[ADD3]] +; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[ADD11]] +; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[ADD19]] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[C:%.*]], i32 [[I_050]] +; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[C]], i32 [[ADD3]] +; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i16, ptr [[C]], i32 [[ADD11]] +; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, ptr [[C]], i32 [[ADD19]] +; CHECK-NEXT: [[TMP:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX12]], align 2 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX20]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2 +; CHECK-NEXT: store i16 43, ptr [[ARRAYIDX7]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2 +; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX15]], align 2 +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX23]], align 2 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP]] to i32 ; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]] @@ -881,23 +873,23 @@ for.body: ; preds = %for.body, %entry %add3 = or i32 %i.050, 1 %add11 = or i32 %i.050, 2 %add19 = or i32 %i.050, 3 - %arrayidx = getelementptr inbounds i16, i16* %b, i32 %i.050 - %arrayidx4 = getelementptr inbounds i16, i16* %b, i32 %add3 - %arrayidx12 = getelementptr inbounds i16, i16* %b, i32 %add11 - %arrayidx20 = getelementptr inbounds i16, i16* %b, i32 %add19 - %arrayidx1 = getelementptr inbounds i16, i16* %c, i32 %i.050 - %arrayidx7 = getelementptr inbounds i16, i16* %c, i32 %add3 - %arrayidx15 = getelementptr inbounds i16, i16* %c, i32 %add11 - %arrayidx23 = getelementptr inbounds i16, i16* %c, i32 %add19 - %tmp = load i16, i16* %arrayidx, align 2 - %tmp2 = load i16, i16* %arrayidx4, align 2 - %tmp4 = load i16, i16* %arrayidx12, align 2 - %tmp6 = load i16, i16* %arrayidx20, align 2 - %tmp1 = load i16, i16* %arrayidx1, align 2 - store i16 43, i16 *%arrayidx7 - %tmp3 = load i16, i16* %arrayidx7, align 2 - %tmp5 = load i16, i16* %arrayidx15, align 2 - %tmp7 = load i16, i16* %arrayidx23, align 2 + %arrayidx = getelementptr inbounds i16, ptr %b, i32 %i.050 + %arrayidx4 = getelementptr inbounds i16, ptr %b, i32 %add3 + %arrayidx12 = getelementptr inbounds i16, ptr %b, i32 %add11 + %arrayidx20 = getelementptr inbounds i16, ptr %b, i32 %add19 + %arrayidx1 = getelementptr inbounds i16, ptr %c, i32 %i.050 + %arrayidx7 = getelementptr inbounds i16, ptr %c, i32 %add3 + %arrayidx15 = getelementptr inbounds i16, ptr %c, i32 %add11 + %arrayidx23 = getelementptr inbounds i16, ptr %c, i32 %add19 + %tmp = load i16, ptr %arrayidx, align 2 + %tmp2 = load i16, ptr %arrayidx4, align 2 + %tmp4 = load i16, ptr %arrayidx12, align 2 + %tmp6 = load i16, ptr %arrayidx20, align 2 + %tmp1 = load i16, ptr %arrayidx1, align 2 + store i16 43, ptr %arrayidx7 + %tmp3 = load i16, ptr %arrayidx7, align 2 + %tmp5 = load i16, ptr %arrayidx15, align 2 + %tmp7 = load i16, ptr %arrayidx23, align 2 %conv = sext i16 %tmp to i32 %conv2 = sext i16 %tmp1 to i32 %mul = mul nsw i32 %conv2, %conv diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/blocks.ll b/llvm/test/CodeGen/ARM/ParallelDSP/blocks.ll index 1e9cdde..5ca5fd6 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/blocks.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/blocks.ll @@ -1,27 +1,25 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -arm-parallel-dsp -dce -mtriple=armv7-a -S %s -o - | FileCheck %s -define i32 @single_block(i16* %a, i16* %b, i32 %acc) { +define i32 @single_block(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @single_block( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A:%.*]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[B:%.*]] to i32* -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP1]], i32 [[TMP3]], i32 [[ACC:%.*]]) ; CHECK-NEXT: ret i32 [[TMP4]] ; entry: - %ld.a.0 = load i16, i16* %a + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b + %ld.b.0 = load i16, ptr %b %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.1 = load i16, i16* %addr.a.1 + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.1 = load i16, ptr %addr.a.1 %sext.a.1 = sext i16 %ld.a.1 to i32 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.b.1 = sext i16 %ld.b.1 to i32 %mul.1 = mul i32 %sext.a.1, %sext.b.1 %add = add i32 %mul.0, %mul.1 @@ -29,27 +27,25 @@ entry: ret i32 %res } -define i64 @single_block_64(i16* %a, i16* %b, i64 %acc) { +define i64 @single_block_64(ptr %a, ptr %b, i64 %acc) { ; CHECK-LABEL: @single_block_64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A:%.*]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[B:%.*]] to i32* -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP3]], i64 [[ACC:%.*]]) ; CHECK-NEXT: ret i64 [[TMP4]] ; entry: - %ld.a.0 = load i16, i16* %a + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b + %ld.b.0 = load i16, ptr %b %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.1 = load i16, i16* %addr.a.1 + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.1 = load i16, ptr %addr.a.1 %sext.a.1 = sext i16 %ld.a.1 to i32 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.b.1 = sext i16 %ld.b.1 to i32 %mul.1 = mul i32 %sext.a.1, %sext.b.1 %sext.mul.0 = sext i32 %mul.0 to i64 @@ -59,13 +55,11 @@ entry: ret i64 %res } -define i32 @multi_block(i16* %a, i16* %b, i32 %acc) { +define i32 @multi_block(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @multi_block( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A:%.*]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[B:%.*]] to i32* -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP1]], i32 [[TMP3]], i32 0) ; CHECK-NEXT: br label [[BB_1:%.*]] ; CHECK: bb.1: @@ -73,16 +67,16 @@ define i32 @multi_block(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[RES]] ; entry: - %ld.a.0 = load i16, i16* %a + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b + %ld.b.0 = load i16, ptr %b %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.1 = load i16, i16* %addr.a.1 + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.1 = load i16, ptr %addr.a.1 %sext.a.1 = sext i16 %ld.a.1 to i32 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.b.1 = sext i16 %ld.b.1 to i32 %mul.1 = mul i32 %sext.a.1, %sext.b.1 %add = add i32 %mul.0, %mul.1 @@ -93,13 +87,11 @@ bb.1: ret i32 %res } -define i64 @multi_block_64(i16* %a, i16* %b, i64 %acc) { +define i64 @multi_block_64(ptr %a, ptr %b, i64 %acc) { ; CHECK-LABEL: @multi_block_64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A:%.*]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[B:%.*]] to i32* -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP3]], i64 0) ; CHECK-NEXT: br label [[BB_1:%.*]] ; CHECK: bb.1: @@ -107,16 +99,16 @@ define i64 @multi_block_64(i16* %a, i16* %b, i64 %acc) { ; CHECK-NEXT: ret i64 [[RES]] ; entry: - %ld.a.0 = load i16, i16* %a + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b + %ld.b.0 = load i16, ptr %b %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.1 = load i16, i16* %addr.a.1 + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.1 = load i16, ptr %addr.a.1 %sext.a.1 = sext i16 %ld.a.1 to i32 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.b.1 = sext i16 %ld.b.1 to i32 %mul.1 = mul i32 %sext.a.1, %sext.b.1 %sext.mul.0 = sext i32 %mul.0 to i64 @@ -129,21 +121,21 @@ bb.1: ret i64 %res } -define i32 @multi_block_1(i16* %a, i16* %b, i32 %acc) { +define i32 @multi_block_1(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @multi_block_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A:%.*]], align 2 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A:%.*]], align 2 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B:%.*]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B:%.*]], align 2 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[SEXT_A_0]], [[SEXT_B_0]] ; CHECK-NEXT: br label [[BB_1:%.*]] ; CHECK: bb.1: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B]], i32 1 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B]], i32 1 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[SEXT_A_1]], [[SEXT_B_1]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]] @@ -151,19 +143,19 @@ define i32 @multi_block_1(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[RES]] ; entry: - %ld.a.0 = load i16, i16* %a + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b + %ld.b.0 = load i16, ptr %b %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 br label %bb.1 bb.1: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.1 = load i16, i16* %addr.a.1 + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.1 = load i16, ptr %addr.a.1 %sext.a.1 = sext i16 %ld.a.1 to i32 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.b.1 = sext i16 %ld.b.1 to i32 %mul.1 = mul i32 %sext.a.1, %sext.b.1 %add = add i32 %mul.0, %mul.1 @@ -173,103 +165,97 @@ bb.1: ; TODO: Four smlads should be generated here, but mul.0 and mul.3 remain as ; scalars. -define i32 @num_load_limit(i16* %a, i16* %b, i32 %acc) { +define i32 @num_load_limit(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @num_load_limit( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A:%.*]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B:%.*]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B:%.*]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP10]] -; CHECK-NEXT: [[ADDR_B_3:%.*]] = getelementptr i16, i16* [[B]], i32 3 -; CHECK-NEXT: [[LD_B_3:%.*]] = load i16, i16* [[ADDR_B_3]], align 2 +; CHECK-NEXT: [[ADDR_B_3:%.*]] = getelementptr i16, ptr [[B]], i32 3 +; CHECK-NEXT: [[LD_B_3:%.*]] = load i16, ptr [[ADDR_B_3]], align 2 ; CHECK-NEXT: [[SEXT_B_3:%.*]] = sext i16 [[LD_B_3]] to i32 ; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[TMP6]], [[SEXT_B_3]] ; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[MUL_3]], [[ACC:%.*]] ; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[MUL_0]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP1]], i32 [[TMP8]], i32 [[TMP12]]) -; CHECK-NEXT: [[ADDR_A_4:%.*]] = getelementptr i16, i16* [[A]], i32 4 -; CHECK-NEXT: [[ADDR_B_4:%.*]] = getelementptr i16, i16* [[B]], i32 4 -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16* [[ADDR_A_4]] to i32* -; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 2 -; CHECK-NEXT: [[TMP16:%.*]] = bitcast i16* [[ADDR_B_4]] to i32* -; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 2 +; CHECK-NEXT: [[ADDR_A_4:%.*]] = getelementptr i16, ptr [[A]], i32 4 +; CHECK-NEXT: [[ADDR_B_4:%.*]] = getelementptr i16, ptr [[B]], i32 4 +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ADDR_A_4]], align 2 +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ADDR_B_4]], align 2 ; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP15]], i32 [[TMP17]], i32 [[TMP13]]) -; CHECK-NEXT: [[ADDR_A_6:%.*]] = getelementptr i16, i16* [[A]], i32 6 -; CHECK-NEXT: [[ADDR_B_6:%.*]] = getelementptr i16, i16* [[B]], i32 6 -; CHECK-NEXT: [[TMP19:%.*]] = bitcast i16* [[ADDR_A_6]] to i32* -; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 2 -; CHECK-NEXT: [[TMP21:%.*]] = bitcast i16* [[ADDR_B_6]] to i32* -; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 2 +; CHECK-NEXT: [[ADDR_A_6:%.*]] = getelementptr i16, ptr [[A]], i32 6 +; CHECK-NEXT: [[ADDR_B_6:%.*]] = getelementptr i16, ptr [[B]], i32 6 +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ADDR_A_6]], align 2 +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ADDR_B_6]], align 2 ; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP18]]) ; CHECK-NEXT: ret i32 [[TMP23]] ; entry: - %ld.a.0 = load i16, i16* %a + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b + %ld.b.0 = load i16, ptr %b %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.1 = load i16, i16* %addr.a.1 + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.1 = load i16, ptr %addr.a.1 %sext.a.1 = sext i16 %ld.a.1 to i32 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.b.1 = sext i16 %ld.b.1 to i32 %mul.1 = mul i32 %sext.a.1, %sext.b.1 %add.0 = add i32 %mul.0, %mul.1 - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.b.2 = getelementptr i16, i16* %b, i32 2 - %ld.a.2 = load i16, i16* %addr.a.2 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.b.2 = getelementptr i16, ptr %b, i32 2 + %ld.a.2 = load i16, ptr %addr.a.2 %sext.a.2 = sext i16 %ld.a.2 to i32 - %ld.b.2 = load i16, i16* %addr.b.2 + %ld.b.2 = load i16, ptr %addr.b.2 %sext.b.2 = sext i16 %ld.b.2 to i32 %mul.2 = mul i32 %sext.a.0, %sext.b.0 - %addr.a.3 = getelementptr i16, i16* %a, i32 3 - %addr.b.3 = getelementptr i16, i16* %b, i32 3 - %ld.a.3 = load i16, i16* %addr.a.3 + %addr.a.3 = getelementptr i16, ptr %a, i32 3 + %addr.b.3 = getelementptr i16, ptr %b, i32 3 + %ld.a.3 = load i16, ptr %addr.a.3 %sext.a.3 = sext i16 %ld.a.3 to i32 - %ld.b.3 = load i16, i16* %addr.b.3 + %ld.b.3 = load i16, ptr %addr.b.3 %sext.b.3 = sext i16 %ld.b.3 to i32 %mul.3 = mul i32 %sext.a.1, %sext.b.3 %add.3 = add i32 %mul.2, %mul.3 - %addr.a.4 = getelementptr i16, i16* %a, i32 4 - %addr.b.4 = getelementptr i16, i16* %b, i32 4 - %ld.a.4 = load i16, i16* %addr.a.4 + %addr.a.4 = getelementptr i16, ptr %a, i32 4 + %addr.b.4 = getelementptr i16, ptr %b, i32 4 + %ld.a.4 = load i16, ptr %addr.a.4 %sext.a.4 = sext i16 %ld.a.4 to i32 - %ld.b.4 = load i16, i16* %addr.b.4 + %ld.b.4 = load i16, ptr %addr.b.4 %sext.b.4 = sext i16 %ld.b.4 to i32 %mul.4 = mul i32 %sext.a.4, %sext.b.4 - %addr.a.5 = getelementptr i16, i16* %a, i32 5 - %addr.b.5 = getelementptr i16, i16* %b, i32 5 - %ld.a.5 = load i16, i16* %addr.a.5 + %addr.a.5 = getelementptr i16, ptr %a, i32 5 + %addr.b.5 = getelementptr i16, ptr %b, i32 5 + %ld.a.5 = load i16, ptr %addr.a.5 %sext.a.5 = sext i16 %ld.a.5 to i32 - %ld.b.5 = load i16, i16* %addr.b.5 + %ld.b.5 = load i16, ptr %addr.b.5 %sext.b.5 = sext i16 %ld.b.5 to i32 %mul.5 = mul i32 %sext.a.5, %sext.b.5 %add.5 = add i32 %mul.4, %mul.5 - %addr.a.6 = getelementptr i16, i16* %a, i32 6 - %addr.b.6 = getelementptr i16, i16* %b, i32 6 - %ld.a.6 = load i16, i16* %addr.a.6 + %addr.a.6 = getelementptr i16, ptr %a, i32 6 + %addr.b.6 = getelementptr i16, ptr %b, i32 6 + %ld.a.6 = load i16, ptr %addr.a.6 %sext.a.6 = sext i16 %ld.a.6 to i32 - %ld.b.6 = load i16, i16* %addr.b.6 + %ld.b.6 = load i16, ptr %addr.b.6 %sext.b.6 = sext i16 %ld.b.6 to i32 %mul.6 = mul i32 %sext.a.6, %sext.b.6 - %addr.a.7 = getelementptr i16, i16* %a, i32 7 - %addr.b.7 = getelementptr i16, i16* %b, i32 7 - %ld.a.7 = load i16, i16* %addr.a.7 + %addr.a.7 = getelementptr i16, ptr %a, i32 7 + %addr.b.7 = getelementptr i16, ptr %b, i32 7 + %ld.a.7 = load i16, ptr %addr.a.7 %sext.a.7 = sext i16 %ld.a.7 to i32 - %ld.b.7 = load i16, i16* %addr.b.7 + %ld.b.7 = load i16, ptr %addr.b.7 %sext.b.7 = sext i16 %ld.b.7 to i32 %mul.7 = mul i32 %sext.a.7, %sext.b.7 %add.7 = add i32 %mul.6, %mul.7 @@ -281,63 +267,63 @@ entry: ret i32 %res } -define i32 @too_many_loads(i16* %a, i16* %b, i32 %acc) { +define i32 @too_many_loads(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @too_many_loads( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A:%.*]], align 2 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A:%.*]], align 2 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B:%.*]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B:%.*]], align 2 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[SEXT_A_0]], [[SEXT_B_0]] -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B]], i32 1 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B]], i32 1 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[SEXT_A_1]], [[SEXT_B_1]] ; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_0]], [[MUL_1]] ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[SEXT_A_0]], [[SEXT_B_0]] -; CHECK-NEXT: [[ADDR_B_3:%.*]] = getelementptr i16, i16* [[B]], i32 3 -; CHECK-NEXT: [[LD_B_3:%.*]] = load i16, i16* [[ADDR_B_3]], align 2 +; CHECK-NEXT: [[ADDR_B_3:%.*]] = getelementptr i16, ptr [[B]], i32 3 +; CHECK-NEXT: [[LD_B_3:%.*]] = load i16, ptr [[ADDR_B_3]], align 2 ; CHECK-NEXT: [[SEXT_B_3:%.*]] = sext i16 [[LD_B_3]] to i32 ; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[SEXT_A_1]], [[SEXT_B_3]] ; CHECK-NEXT: [[ADD_3:%.*]] = add i32 [[MUL_2]], [[MUL_3]] -; CHECK-NEXT: [[ADDR_A_4:%.*]] = getelementptr i16, i16* [[A]], i32 4 -; CHECK-NEXT: [[ADDR_B_4:%.*]] = getelementptr i16, i16* [[B]], i32 4 -; CHECK-NEXT: [[LD_A_4:%.*]] = load i16, i16* [[ADDR_A_4]], align 2 +; CHECK-NEXT: [[ADDR_A_4:%.*]] = getelementptr i16, ptr [[A]], i32 4 +; CHECK-NEXT: [[ADDR_B_4:%.*]] = getelementptr i16, ptr [[B]], i32 4 +; CHECK-NEXT: [[LD_A_4:%.*]] = load i16, ptr [[ADDR_A_4]], align 2 ; CHECK-NEXT: [[SEXT_A_4:%.*]] = sext i16 [[LD_A_4]] to i32 -; CHECK-NEXT: [[LD_B_4:%.*]] = load i16, i16* [[ADDR_B_4]], align 2 +; CHECK-NEXT: [[LD_B_4:%.*]] = load i16, ptr [[ADDR_B_4]], align 2 ; CHECK-NEXT: [[SEXT_B_4:%.*]] = sext i16 [[LD_B_4]] to i32 ; CHECK-NEXT: [[MUL_4:%.*]] = mul i32 [[SEXT_A_4]], [[SEXT_B_4]] -; CHECK-NEXT: [[ADDR_A_5:%.*]] = getelementptr i16, i16* [[A]], i32 5 -; CHECK-NEXT: [[ADDR_B_5:%.*]] = getelementptr i16, i16* [[B]], i32 5 -; CHECK-NEXT: [[LD_A_5:%.*]] = load i16, i16* [[ADDR_A_5]], align 2 +; CHECK-NEXT: [[ADDR_A_5:%.*]] = getelementptr i16, ptr [[A]], i32 5 +; CHECK-NEXT: [[ADDR_B_5:%.*]] = getelementptr i16, ptr [[B]], i32 5 +; CHECK-NEXT: [[LD_A_5:%.*]] = load i16, ptr [[ADDR_A_5]], align 2 ; CHECK-NEXT: [[SEXT_A_5:%.*]] = sext i16 [[LD_A_5]] to i32 -; CHECK-NEXT: [[LD_B_5:%.*]] = load i16, i16* [[ADDR_B_5]], align 2 +; CHECK-NEXT: [[LD_B_5:%.*]] = load i16, ptr [[ADDR_B_5]], align 2 ; CHECK-NEXT: [[SEXT_B_5:%.*]] = sext i16 [[LD_B_5]] to i32 ; CHECK-NEXT: [[MUL_5:%.*]] = mul i32 [[SEXT_A_5]], [[SEXT_B_5]] ; CHECK-NEXT: [[ADD_5:%.*]] = add i32 [[MUL_4]], [[MUL_5]] -; CHECK-NEXT: [[ADDR_A_6:%.*]] = getelementptr i16, i16* [[A]], i32 6 -; CHECK-NEXT: [[ADDR_B_6:%.*]] = getelementptr i16, i16* [[B]], i32 6 -; CHECK-NEXT: [[LD_A_6:%.*]] = load i16, i16* [[ADDR_A_6]], align 2 +; CHECK-NEXT: [[ADDR_A_6:%.*]] = getelementptr i16, ptr [[A]], i32 6 +; CHECK-NEXT: [[ADDR_B_6:%.*]] = getelementptr i16, ptr [[B]], i32 6 +; CHECK-NEXT: [[LD_A_6:%.*]] = load i16, ptr [[ADDR_A_6]], align 2 ; CHECK-NEXT: [[SEXT_A_6:%.*]] = sext i16 [[LD_A_6]] to i32 -; CHECK-NEXT: [[LD_B_6:%.*]] = load i16, i16* [[ADDR_B_6]], align 2 +; CHECK-NEXT: [[LD_B_6:%.*]] = load i16, ptr [[ADDR_B_6]], align 2 ; CHECK-NEXT: [[SEXT_B_6:%.*]] = sext i16 [[LD_B_6]] to i32 ; CHECK-NEXT: [[MUL_6:%.*]] = mul i32 [[SEXT_A_6]], [[SEXT_B_6]] -; CHECK-NEXT: [[ADDR_A_7:%.*]] = getelementptr i16, i16* [[A]], i32 7 -; CHECK-NEXT: [[ADDR_B_7:%.*]] = getelementptr i16, i16* [[B]], i32 7 -; CHECK-NEXT: [[LD_A_7:%.*]] = load i16, i16* [[ADDR_A_7]], align 2 +; CHECK-NEXT: [[ADDR_A_7:%.*]] = getelementptr i16, ptr [[A]], i32 7 +; CHECK-NEXT: [[ADDR_B_7:%.*]] = getelementptr i16, ptr [[B]], i32 7 +; CHECK-NEXT: [[LD_A_7:%.*]] = load i16, ptr [[ADDR_A_7]], align 2 ; CHECK-NEXT: [[SEXT_A_7:%.*]] = sext i16 [[LD_A_7]] to i32 -; CHECK-NEXT: [[LD_B_7:%.*]] = load i16, i16* [[ADDR_B_7]], align 2 +; CHECK-NEXT: [[LD_B_7:%.*]] = load i16, ptr [[ADDR_B_7]], align 2 ; CHECK-NEXT: [[SEXT_B_7:%.*]] = sext i16 [[LD_B_7]] to i32 ; CHECK-NEXT: [[MUL_7:%.*]] = mul i32 [[SEXT_A_7]], [[SEXT_B_7]] ; CHECK-NEXT: [[ADD_7:%.*]] = add i32 [[MUL_6]], [[MUL_7]] -; CHECK-NEXT: [[ADDR_A_8:%.*]] = getelementptr i16, i16* [[A]], i32 7 -; CHECK-NEXT: [[ADDR_B_8:%.*]] = getelementptr i16, i16* [[B]], i32 7 -; CHECK-NEXT: [[LD_A_8:%.*]] = load i16, i16* [[ADDR_A_8]], align 2 +; CHECK-NEXT: [[ADDR_A_8:%.*]] = getelementptr i16, ptr [[A]], i32 7 +; CHECK-NEXT: [[ADDR_B_8:%.*]] = getelementptr i16, ptr [[B]], i32 7 +; CHECK-NEXT: [[LD_A_8:%.*]] = load i16, ptr [[ADDR_A_8]], align 2 ; CHECK-NEXT: [[SEXT_A_8:%.*]] = sext i16 [[LD_A_8]] to i32 -; CHECK-NEXT: [[LD_B_8:%.*]] = load i16, i16* [[ADDR_B_8]], align 2 +; CHECK-NEXT: [[LD_B_8:%.*]] = load i16, ptr [[ADDR_B_8]], align 2 ; CHECK-NEXT: [[SEXT_B_8:%.*]] = sext i16 [[LD_B_8]] to i32 ; CHECK-NEXT: [[MUL_8:%.*]] = mul i32 [[SEXT_A_8]], [[SEXT_B_8]] ; CHECK-NEXT: [[ADD_10:%.*]] = add i32 [[ADD_7]], [[ADD_5]] @@ -348,73 +334,73 @@ define i32 @too_many_loads(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[RES]] ; entry: - %ld.a.0 = load i16, i16* %a + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b + %ld.b.0 = load i16, ptr %b %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.1 = load i16, i16* %addr.a.1 + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.1 = load i16, ptr %addr.a.1 %sext.a.1 = sext i16 %ld.a.1 to i32 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.b.1 = sext i16 %ld.b.1 to i32 %mul.1 = mul i32 %sext.a.1, %sext.b.1 %add.0 = add i32 %mul.0, %mul.1 - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.b.2 = getelementptr i16, i16* %b, i32 2 - %ld.a.2 = load i16, i16* %addr.a.2 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.b.2 = getelementptr i16, ptr %b, i32 2 + %ld.a.2 = load i16, ptr %addr.a.2 %sext.a.2 = sext i16 %ld.a.2 to i32 - %ld.b.2 = load i16, i16* %addr.b.2 + %ld.b.2 = load i16, ptr %addr.b.2 %sext.b.2 = sext i16 %ld.b.2 to i32 %mul.2 = mul i32 %sext.a.0, %sext.b.0 - %addr.a.3 = getelementptr i16, i16* %a, i32 3 - %addr.b.3 = getelementptr i16, i16* %b, i32 3 - %ld.a.3 = load i16, i16* %addr.a.3 + %addr.a.3 = getelementptr i16, ptr %a, i32 3 + %addr.b.3 = getelementptr i16, ptr %b, i32 3 + %ld.a.3 = load i16, ptr %addr.a.3 %sext.a.3 = sext i16 %ld.a.3 to i32 - %ld.b.3 = load i16, i16* %addr.b.3 + %ld.b.3 = load i16, ptr %addr.b.3 %sext.b.3 = sext i16 %ld.b.3 to i32 %mul.3 = mul i32 %sext.a.1, %sext.b.3 %add.3 = add i32 %mul.2, %mul.3 - %addr.a.4 = getelementptr i16, i16* %a, i32 4 - %addr.b.4 = getelementptr i16, i16* %b, i32 4 - %ld.a.4 = load i16, i16* %addr.a.4 + %addr.a.4 = getelementptr i16, ptr %a, i32 4 + %addr.b.4 = getelementptr i16, ptr %b, i32 4 + %ld.a.4 = load i16, ptr %addr.a.4 %sext.a.4 = sext i16 %ld.a.4 to i32 - %ld.b.4 = load i16, i16* %addr.b.4 + %ld.b.4 = load i16, ptr %addr.b.4 %sext.b.4 = sext i16 %ld.b.4 to i32 %mul.4 = mul i32 %sext.a.4, %sext.b.4 - %addr.a.5 = getelementptr i16, i16* %a, i32 5 - %addr.b.5 = getelementptr i16, i16* %b, i32 5 - %ld.a.5 = load i16, i16* %addr.a.5 + %addr.a.5 = getelementptr i16, ptr %a, i32 5 + %addr.b.5 = getelementptr i16, ptr %b, i32 5 + %ld.a.5 = load i16, ptr %addr.a.5 %sext.a.5 = sext i16 %ld.a.5 to i32 - %ld.b.5 = load i16, i16* %addr.b.5 + %ld.b.5 = load i16, ptr %addr.b.5 %sext.b.5 = sext i16 %ld.b.5 to i32 %mul.5 = mul i32 %sext.a.5, %sext.b.5 %add.5 = add i32 %mul.4, %mul.5 - %addr.a.6 = getelementptr i16, i16* %a, i32 6 - %addr.b.6 = getelementptr i16, i16* %b, i32 6 - %ld.a.6 = load i16, i16* %addr.a.6 + %addr.a.6 = getelementptr i16, ptr %a, i32 6 + %addr.b.6 = getelementptr i16, ptr %b, i32 6 + %ld.a.6 = load i16, ptr %addr.a.6 %sext.a.6 = sext i16 %ld.a.6 to i32 - %ld.b.6 = load i16, i16* %addr.b.6 + %ld.b.6 = load i16, ptr %addr.b.6 %sext.b.6 = sext i16 %ld.b.6 to i32 %mul.6 = mul i32 %sext.a.6, %sext.b.6 - %addr.a.7 = getelementptr i16, i16* %a, i32 7 - %addr.b.7 = getelementptr i16, i16* %b, i32 7 - %ld.a.7 = load i16, i16* %addr.a.7 + %addr.a.7 = getelementptr i16, ptr %a, i32 7 + %addr.b.7 = getelementptr i16, ptr %b, i32 7 + %ld.a.7 = load i16, ptr %addr.a.7 %sext.a.7 = sext i16 %ld.a.7 to i32 - %ld.b.7 = load i16, i16* %addr.b.7 + %ld.b.7 = load i16, ptr %addr.b.7 %sext.b.7 = sext i16 %ld.b.7 to i32 %mul.7 = mul i32 %sext.a.7, %sext.b.7 %add.7 = add i32 %mul.6, %mul.7 - %addr.a.8 = getelementptr i16, i16* %a, i32 7 - %addr.b.8 = getelementptr i16, i16* %b, i32 7 - %ld.a.8 = load i16, i16* %addr.a.8 + %addr.a.8 = getelementptr i16, ptr %a, i32 7 + %addr.b.8 = getelementptr i16, ptr %b, i32 7 + %ld.a.8 = load i16, ptr %addr.a.8 %sext.a.8 = sext i16 %ld.a.8 to i32 - %ld.b.8 = load i16, i16* %addr.b.8 + %ld.b.8 = load i16, ptr %addr.b.8 %sext.b.8 = sext i16 %ld.b.8 to i32 %mul.8 = mul i32 %sext.a.8, %sext.b.8 diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/exchange.ll b/llvm/test/CodeGen/ARM/ParallelDSP/exchange.ll index 5fcef9d..4e5eaf7 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/exchange.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/exchange.ll @@ -1,31 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -arm-parallel-dsp -mtriple=armv7-a -S %s -o - | FileCheck %s -define i32 @exchange_1(i16* %a, i16* %b, i32 %acc) { +define i32 @exchange_1(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @exchange_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 @@ -36,13 +34,13 @@ define i32 @exchange_1(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[TMP10]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 @@ -53,31 +51,29 @@ entry: ret i32 %res } -define i32 @exchange_2(i16* %a, i16* %b, i32 %acc) { +define i32 @exchange_2(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @exchange_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 @@ -88,13 +84,13 @@ define i32 @exchange_2(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[TMP10]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 @@ -105,31 +101,29 @@ entry: ret i32 %res } -define i32 @exchange_3(i16* %a, i16* %b, i32 %acc) { +define i32 @exchange_3(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @exchange_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP8]], i32 [[TMP1]], i32 [[ACC:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 @@ -140,13 +134,13 @@ define i32 @exchange_3(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[TMP10]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 @@ -157,31 +151,29 @@ entry: ret i32 %res } -define i32 @exchange_4(i16* %a, i16* %b, i32 %acc) { +define i32 @exchange_4(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @exchange_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP8]], i32 [[TMP1]], i32 [[ACC:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 @@ -192,13 +184,13 @@ define i32 @exchange_4(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[TMP10]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 @@ -209,49 +201,46 @@ entry: ret i32 %res } -define i32 @exchange_multi_use_1(i16* %a, i16* %b, i32 %acc) { +define i32 @exchange_multi_use_1(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @exchange_multi_use_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP14]] ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP11]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]] -; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, i16* [[A]], i32 2 -; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, i16* [[A]], i32 3 -; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, i16* [[ADDR_A_2]], align 2 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i16* [[ADDR_A_2]] to i32* -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 2 +; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 +; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3 +; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 ; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP16]], i32 [[TMP8]], i32 [[TMP10]]) ; CHECK-NEXT: [[TMP19:%.*]] = sext i16 [[TMP17]] to i32 ; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP16]], 16 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 ; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 -; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, i16* [[ADDR_A_3]], align 2 +; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 ; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP22]], [[TMP14]] @@ -262,23 +251,23 @@ define i32 @exchange_multi_use_1(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[TMP18]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.1 %mul.1 = mul i32 %sext.a.1, %sext.b.0 %add = add i32 %mul.0, %mul.1 - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.a.3 = getelementptr i16, i16* %a, i32 3 - %ld.a.2 = load i16, i16* %addr.a.2 - %ld.a.3 = load i16, i16* %addr.a.3 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.a.3 = getelementptr i16, ptr %a, i32 3 + %ld.a.2 = load i16, ptr %addr.a.2 + %ld.a.3 = load i16, ptr %addr.a.3 %sext.a.2 = sext i16 %ld.a.2 to i32 %sext.a.3 = sext i16 %ld.a.3 to i32 %mul.2 = mul i32 %sext.a.3, %sext.b.1 @@ -289,49 +278,46 @@ entry: ret i32 %res } -define i64 @exchange_multi_use_64_1(i16* %a, i16* %b, i64 %acc) { +define i64 @exchange_multi_use_64_1(ptr %a, ptr %b, i64 %acc) { ; CHECK-LABEL: @exchange_multi_use_64_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP1]], i32 [[TMP8]], i64 [[ACC:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP14]] ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP11]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]] -; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, i16* [[A]], i32 2 -; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, i16* [[A]], i32 3 -; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, i16* [[ADDR_A_2]], align 2 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i16* [[ADDR_A_2]] to i32* -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 2 +; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 +; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3 +; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP16]], i32 [[TMP8]], i64 [[TMP10]]) ; CHECK-NEXT: [[TMP19:%.*]] = sext i16 [[TMP17]] to i32 ; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP16]], 16 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 ; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 -; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, i16* [[ADDR_A_3]], align 2 +; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 ; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP22]], [[TMP14]] @@ -343,23 +329,23 @@ define i64 @exchange_multi_use_64_1(i16* %a, i16* %b, i64 %acc) { ; CHECK-NEXT: ret i64 [[TMP18]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.1 %mul.1 = mul i32 %sext.a.1, %sext.b.0 %add = add i32 %mul.0, %mul.1 - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.a.3 = getelementptr i16, i16* %a, i32 3 - %ld.a.2 = load i16, i16* %addr.a.2 - %ld.a.3 = load i16, i16* %addr.a.3 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.a.3 = getelementptr i16, ptr %a, i32 3 + %ld.a.2 = load i16, ptr %addr.a.2 + %ld.a.3 = load i16, ptr %addr.a.3 %sext.a.2 = sext i16 %ld.a.2 to i32 %sext.a.3 = sext i16 %ld.a.3 to i32 %mul.2 = mul i32 %sext.a.3, %sext.b.1 @@ -371,31 +357,29 @@ entry: ret i64 %res } -define i64 @exchange_multi_use_64_2(i16* %a, i16* %b, i64 %acc) { +define i64 @exchange_multi_use_64_2(ptr %a, ptr %b, i64 %acc) { ; CHECK-LABEL: @exchange_multi_use_64_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP1]], i32 [[TMP8]], i64 [[ACC:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 @@ -403,18 +387,17 @@ define i64 @exchange_multi_use_64_2(i16* %a, i16* %b, i64 %acc) { ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP11]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]] ; CHECK-NEXT: [[SEXT_ADD:%.*]] = sext i32 [[ADD]] to i64 -; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, i16* [[A]], i32 2 -; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, i16* [[A]], i32 3 -; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, i16* [[ADDR_A_2]], align 2 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i16* [[ADDR_A_2]] to i32* -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 2 +; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 +; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3 +; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP16]], i32 [[TMP8]], i64 [[TMP10]]) ; CHECK-NEXT: [[TMP19:%.*]] = sext i16 [[TMP17]] to i32 ; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP16]], 16 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 ; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 -; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, i16* [[ADDR_A_3]], align 2 +; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 ; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP22]], [[TMP14]] @@ -426,13 +409,13 @@ define i64 @exchange_multi_use_64_2(i16* %a, i16* %b, i64 %acc) { ; CHECK-NEXT: ret i64 [[TMP18]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 @@ -440,10 +423,10 @@ entry: %mul.1 = mul i32 %sext.a.1, %sext.b.0 %add = add i32 %mul.0, %mul.1 %sext.add = sext i32 %add to i64 - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.a.3 = getelementptr i16, i16* %a, i32 3 - %ld.a.2 = load i16, i16* %addr.a.2 - %ld.a.3 = load i16, i16* %addr.a.3 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.a.3 = getelementptr i16, ptr %a, i32 3 + %ld.a.2 = load i16, ptr %addr.a.2 + %ld.a.3 = load i16, ptr %addr.a.3 %sext.a.2 = sext i16 %ld.a.2 to i32 %sext.a.3 = sext i16 %ld.a.3 to i32 %mul.2 = mul i32 %sext.a.3, %sext.b.1 @@ -455,49 +438,46 @@ entry: ret i64 %res } -define i32 @exchange_multi_use_2(i16* %a, i16* %b, i32 %acc) { +define i32 @exchange_multi_use_2(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @exchange_multi_use_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP11]] ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP14]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]] -; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, i16* [[A]], i32 2 -; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, i16* [[A]], i32 3 -; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, i16* [[ADDR_A_2]], align 2 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i16* [[ADDR_A_2]] to i32* -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 2 +; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 +; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3 +; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 ; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP8]], i32 [[TMP16]], i32 [[TMP10]]) ; CHECK-NEXT: [[TMP19:%.*]] = sext i16 [[TMP17]] to i32 ; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP16]], 16 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 ; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 -; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, i16* [[ADDR_A_3]], align 2 +; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 ; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP11]], [[TMP22]] @@ -508,23 +488,23 @@ define i32 @exchange_multi_use_2(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[TMP18]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 %mul.1 = mul i32 %sext.a.1, %sext.b.1 %add = add i32 %mul.0, %mul.1 - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.a.3 = getelementptr i16, i16* %a, i32 3 - %ld.a.2 = load i16, i16* %addr.a.2 - %ld.a.3 = load i16, i16* %addr.a.3 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.a.3 = getelementptr i16, ptr %a, i32 3 + %ld.a.2 = load i16, ptr %addr.a.2 + %ld.a.3 = load i16, ptr %addr.a.3 %sext.a.2 = sext i16 %ld.a.2 to i32 %sext.a.3 = sext i16 %ld.a.3 to i32 %mul.2 = mul i32 %sext.b.0, %sext.a.3 @@ -536,38 +516,36 @@ entry: } ; TODO: Why aren't two intrinsics generated? -define i32 @exchange_multi_use_3(i16* %a, i16* %b, i32 %acc) { +define i32 @exchange_multi_use_3(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @exchange_multi_use_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 -; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, i16* [[A]], i32 2 -; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, i16* [[A]], i32 3 -; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, i16* [[ADDR_A_2]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[ADDR_A_2]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 +; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3 +; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 0) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, i16* [[ADDR_A_3]], align 2 +; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 ; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP3]], [[TMP14]] @@ -581,20 +559,20 @@ define i32 @exchange_multi_use_3(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[RES]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.a.3 = getelementptr i16, i16* %a, i32 3 - %ld.a.2 = load i16, i16* %addr.a.2 - %ld.a.3 = load i16, i16* %addr.a.3 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.a.3 = getelementptr i16, ptr %a, i32 3 + %ld.a.2 = load i16, ptr %addr.a.2 + %ld.a.3 = load i16, ptr %addr.a.3 %sext.a.2 = sext i16 %ld.a.2 to i32 %sext.a.3 = sext i16 %ld.a.3 to i32 %mul.2 = mul i32 %sext.b.0, %sext.a.3 @@ -609,38 +587,35 @@ entry: } ; TODO: Would it be better to generate a smlad and then sign extend it? -define i64 @exchange_multi_use_64_3(i16* %a, i16* %b, i64 %acc) { +define i64 @exchange_multi_use_64_3(ptr %a, ptr %b, i64 %acc) { ; CHECK-LABEL: @exchange_multi_use_64_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i16 ; CHECK-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 -; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, i16* [[A]], i32 2 -; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, i16* [[A]], i32 3 -; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, i16* [[ADDR_A_2]], align 2 -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16* [[ADDR_A_2]] to i32* -; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 2 +; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 +; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3 +; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 ; CHECK-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16 ; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP8]], i32 [[TMP15]], i64 0) ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP8]], i64 [[TMP17]]) @@ -648,7 +623,7 @@ define i64 @exchange_multi_use_64_3(i16* %a, i16* %b, i64 %acc) { ; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP15]], 16 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 ; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 -; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, i16* [[ADDR_A_3]], align 2 +; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 ; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP10]], [[TMP22]] @@ -664,20 +639,20 @@ define i64 @exchange_multi_use_64_3(i16* %a, i16* %b, i64 %acc) { ; CHECK-NEXT: ret i64 [[RES]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.a.3 = getelementptr i16, i16* %a, i32 3 - %ld.a.2 = load i16, i16* %addr.a.2 - %ld.a.3 = load i16, i16* %addr.a.3 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.a.3 = getelementptr i16, ptr %a, i32 3 + %ld.a.2 = load i16, ptr %addr.a.2 + %ld.a.3 = load i16, ptr %addr.a.3 %sext.a.2 = sext i16 %ld.a.2 to i32 %sext.a.3 = sext i16 %ld.a.3 to i32 %mul.2 = mul i32 %sext.b.0, %sext.a.3 @@ -694,38 +669,36 @@ entry: } ; TODO: Why isn't smladx generated too? -define i32 @exchange_multi_use_4(i16* %a, i16* %b, i32 %acc) { +define i32 @exchange_multi_use_4(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @exchange_multi_use_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP1]], i32 [[TMP8]], i32 0) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 -; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, i16* [[A]], i32 2 -; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, i16* [[A]], i32 3 -; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, i16* [[ADDR_A_2]], align 2 -; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, i16* [[ADDR_A_3]], align 2 +; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 +; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3 +; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 +; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 ; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP11]], [[SEXT_A_3]] @@ -739,20 +712,20 @@ define i32 @exchange_multi_use_4(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[RES]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.a.3 = getelementptr i16, i16* %a, i32 3 - %ld.a.2 = load i16, i16* %addr.a.2 - %ld.a.3 = load i16, i16* %addr.a.3 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.a.3 = getelementptr i16, ptr %a, i32 3 + %ld.a.2 = load i16, ptr %addr.a.2 + %ld.a.3 = load i16, ptr %addr.a.3 %sext.a.2 = sext i16 %ld.a.2 to i32 %sext.a.3 = sext i16 %ld.a.3 to i32 %mul.2 = mul i32 %sext.b.0, %sext.a.3 @@ -766,31 +739,29 @@ entry: ret i32 %res } -define i32 @exchange_swap(i16* %a, i16* %b, i32 %acc) { +define i32 @exchange_swap(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @exchange_swap( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP8]], i32 [[TMP1]], i32 [[ACC:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 @@ -801,13 +772,13 @@ define i32 @exchange_swap(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[TMP10]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 @@ -818,31 +789,29 @@ entry: ret i32 %res } -define i32 @exchange_swap_2(i16* %a, i16* %b, i32 %acc) { +define i32 @exchange_swap_2(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @exchange_swap_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 @@ -853,13 +822,13 @@ define i32 @exchange_swap_2(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[TMP10]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 @@ -870,31 +839,29 @@ entry: ret i32 %res } -define i32 @exchange_swap_3(i16* %a, i16* %b, i32 %acc) { +define i32 @exchange_swap_3(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @exchange_swap_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 @@ -905,13 +872,13 @@ define i32 @exchange_swap_3(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[TMP10]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/inner-full-unroll.ll b/llvm/test/CodeGen/ARM/ParallelDSP/inner-full-unroll.ll index 38f97b9..a6bff26 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/inner-full-unroll.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/inner-full-unroll.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -mtriple=thumbv7em -arm-parallel-dsp -dce -S %s -o - | FileCheck %s -define void @full_unroll(i32* noalias nocapture %a, i16** noalias nocapture readonly %b, i16** noalias nocapture readonly %c, i32 %N) { +define void @full_unroll(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i32 %N) { ; CHECK-LABEL: @full_unroll( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP29:%.*]] = icmp eq i32 [[N:%.*]], 0 @@ -10,24 +10,20 @@ define void @full_unroll(i32* noalias nocapture %a, i16** noalias nocapture read ; CHECK-NEXT: ret void ; CHECK: for.body: ; CHECK-NEXT: [[I_030:%.*]] = phi i32 [ [[INC12:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_030]] -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16*, i16** [[B:%.*]], i32 [[I_030]] -; CHECK-NEXT: [[TMP0:%.*]] = load i16*, i16** [[ARRAYIDX5]], align 4 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16*, i16** [[C:%.*]], i32 [[I_030]] -; CHECK-NEXT: [[TMP1:%.*]] = load i16*, i16** [[ARRAYIDX7]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP0]] to i32* -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP1]] to i32* -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_030]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds ptr, ptr [[B:%.*]], i32 [[I_030]] +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[C:%.*]], i32 [[I_030]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 2 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 2 ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP5]], i32 [[TMP3]], i32 0) -; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[ARRAYIDX6_2]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 -; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[ARRAYIDX8_2]] to i32* -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 2 +; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX6_2]], align 2 +; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX8_2]], align 2 ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP10]], i32 [[TMP8]], i32 [[TMP6]]) -; CHECK-NEXT: store i32 [[TMP11]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[INC12]] = add nuw i32 [[I_030]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC12]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] @@ -41,47 +37,47 @@ for.cond.cleanup: ; preds = %for.body, %entry for.body: ; preds = %entry, %for.body %i.030 = phi i32 [ %inc12, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.030 - %arrayidx5 = getelementptr inbounds i16*, i16** %b, i32 %i.030 - %0 = load i16*, i16** %arrayidx5, align 4 - %arrayidx7 = getelementptr inbounds i16*, i16** %c, i32 %i.030 - %1 = load i16*, i16** %arrayidx7, align 4 - %2 = load i16, i16* %0, align 2 + %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.030 + %arrayidx5 = getelementptr inbounds ptr, ptr %b, i32 %i.030 + %0 = load ptr, ptr %arrayidx5, align 4 + %arrayidx7 = getelementptr inbounds ptr, ptr %c, i32 %i.030 + %1 = load ptr, ptr %arrayidx7, align 4 + %2 = load i16, ptr %0, align 2 %conv = sext i16 %2 to i32 - %3 = load i16, i16* %1, align 2 + %3 = load i16, ptr %1, align 2 %conv9 = sext i16 %3 to i32 %mul = mul nsw i32 %conv9, %conv - %arrayidx6.1 = getelementptr inbounds i16, i16* %0, i32 1 - %4 = load i16, i16* %arrayidx6.1, align 2 + %arrayidx6.1 = getelementptr inbounds i16, ptr %0, i32 1 + %4 = load i16, ptr %arrayidx6.1, align 2 %conv.1 = sext i16 %4 to i32 - %arrayidx8.1 = getelementptr inbounds i16, i16* %1, i32 1 - %5 = load i16, i16* %arrayidx8.1, align 2 + %arrayidx8.1 = getelementptr inbounds i16, ptr %1, i32 1 + %5 = load i16, ptr %arrayidx8.1, align 2 %conv9.1 = sext i16 %5 to i32 %mul.1 = mul nsw i32 %conv9.1, %conv.1 %add.1 = add nsw i32 %mul.1, %mul - %arrayidx6.2 = getelementptr inbounds i16, i16* %0, i32 2 - %6 = load i16, i16* %arrayidx6.2, align 2 + %arrayidx6.2 = getelementptr inbounds i16, ptr %0, i32 2 + %6 = load i16, ptr %arrayidx6.2, align 2 %conv.2 = sext i16 %6 to i32 - %arrayidx8.2 = getelementptr inbounds i16, i16* %1, i32 2 - %7 = load i16, i16* %arrayidx8.2, align 2 + %arrayidx8.2 = getelementptr inbounds i16, ptr %1, i32 2 + %7 = load i16, ptr %arrayidx8.2, align 2 %conv9.2 = sext i16 %7 to i32 %mul.2 = mul nsw i32 %conv9.2, %conv.2 %add.2 = add nsw i32 %mul.2, %add.1 - %arrayidx6.3 = getelementptr inbounds i16, i16* %0, i32 3 - %8 = load i16, i16* %arrayidx6.3, align 2 + %arrayidx6.3 = getelementptr inbounds i16, ptr %0, i32 3 + %8 = load i16, ptr %arrayidx6.3, align 2 %conv.3 = sext i16 %8 to i32 - %arrayidx8.3 = getelementptr inbounds i16, i16* %1, i32 3 - %9 = load i16, i16* %arrayidx8.3, align 2 + %arrayidx8.3 = getelementptr inbounds i16, ptr %1, i32 3 + %9 = load i16, ptr %arrayidx8.3, align 2 %conv9.3 = sext i16 %9 to i32 %mul.3 = mul nsw i32 %conv9.3, %conv.3 %add.3 = add nsw i32 %mul.3, %add.2 - store i32 %add.3, i32* %arrayidx, align 4 + store i32 %add.3, ptr %arrayidx, align 4 %inc12 = add nuw i32 %i.030, 1 %exitcond = icmp eq i32 %inc12, %N br i1 %exitcond, label %for.cond.cleanup, label %for.body } -define void @full_unroll_sub(i32* noalias nocapture %a, i16** noalias nocapture readonly %b, i16** noalias nocapture readonly %c, i32 %N) { +define void @full_unroll_sub(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i32 %N) { ; CHECK-LABEL: @full_unroll_sub( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP29:%.*]] = icmp eq i32 [[N:%.*]], 0 @@ -90,32 +86,30 @@ define void @full_unroll_sub(i32* noalias nocapture %a, i16** noalias nocapture ; CHECK-NEXT: ret void ; CHECK: for.body: ; CHECK-NEXT: [[I_030:%.*]] = phi i32 [ [[INC12:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_030]] -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16*, i16** [[B:%.*]], i32 [[I_030]] -; CHECK-NEXT: [[TMP0:%.*]] = load i16*, i16** [[ARRAYIDX5]], align 4 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16*, i16** [[C:%.*]], i32 [[I_030]] -; CHECK-NEXT: [[TMP1:%.*]] = load i16*, i16** [[ARRAYIDX7]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]], align 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_030]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds ptr, ptr [[B:%.*]], i32 [[I_030]] +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[C:%.*]], i32 [[I_030]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP0]], align 2 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP1]], align 2 ; CHECK-NEXT: [[CONV9:%.*]] = sext i16 [[TMP3]] to i32 ; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV9]], [[CONV]] -; CHECK-NEXT: [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX6_1]], align 2 +; CHECK-NEXT: [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX6_1]], align 2 ; CHECK-NEXT: [[CONV_1:%.*]] = sext i16 [[TMP4]] to i32 -; CHECK-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX8_1]], align 2 +; CHECK-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX8_1]], align 2 ; CHECK-NEXT: [[CONV9_1:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[CONV9_1]], [[CONV_1]] ; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[MUL_1]], [[SUB]] -; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[ARRAYIDX6_2]] to i32* -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 2 -; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[ARRAYIDX8_2]] to i32* -; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 2 +; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX6_2]], align 2 +; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX8_2]], align 2 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP9]], i32 [[TMP7]], i32 [[ADD_1]]) -; CHECK-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[INC12]] = add nuw i32 [[I_030]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC12]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] @@ -129,41 +123,41 @@ for.cond.cleanup: ; preds = %for.body, %entry for.body: ; preds = %entry, %for.body %i.030 = phi i32 [ %inc12, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.030 - %arrayidx5 = getelementptr inbounds i16*, i16** %b, i32 %i.030 - %0 = load i16*, i16** %arrayidx5, align 4 - %arrayidx7 = getelementptr inbounds i16*, i16** %c, i32 %i.030 - %1 = load i16*, i16** %arrayidx7, align 4 - %2 = load i16, i16* %0, align 2 + %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.030 + %arrayidx5 = getelementptr inbounds ptr, ptr %b, i32 %i.030 + %0 = load ptr, ptr %arrayidx5, align 4 + %arrayidx7 = getelementptr inbounds ptr, ptr %c, i32 %i.030 + %1 = load ptr, ptr %arrayidx7, align 4 + %2 = load i16, ptr %0, align 2 %conv = sext i16 %2 to i32 - %3 = load i16, i16* %1, align 2 + %3 = load i16, ptr %1, align 2 %conv9 = sext i16 %3 to i32 %sub = sub nsw i32 %conv9, %conv - %arrayidx6.1 = getelementptr inbounds i16, i16* %0, i32 1 - %4 = load i16, i16* %arrayidx6.1, align 2 + %arrayidx6.1 = getelementptr inbounds i16, ptr %0, i32 1 + %4 = load i16, ptr %arrayidx6.1, align 2 %conv.1 = sext i16 %4 to i32 - %arrayidx8.1 = getelementptr inbounds i16, i16* %1, i32 1 - %5 = load i16, i16* %arrayidx8.1, align 2 + %arrayidx8.1 = getelementptr inbounds i16, ptr %1, i32 1 + %5 = load i16, ptr %arrayidx8.1, align 2 %conv9.1 = sext i16 %5 to i32 %mul.1 = mul nsw i32 %conv9.1, %conv.1 %add.1 = add nsw i32 %mul.1, %sub - %arrayidx6.2 = getelementptr inbounds i16, i16* %0, i32 2 - %6 = load i16, i16* %arrayidx6.2, align 2 + %arrayidx6.2 = getelementptr inbounds i16, ptr %0, i32 2 + %6 = load i16, ptr %arrayidx6.2, align 2 %conv.2 = sext i16 %6 to i32 - %arrayidx8.2 = getelementptr inbounds i16, i16* %1, i32 2 - %7 = load i16, i16* %arrayidx8.2, align 2 + %arrayidx8.2 = getelementptr inbounds i16, ptr %1, i32 2 + %7 = load i16, ptr %arrayidx8.2, align 2 %conv9.2 = sext i16 %7 to i32 %mul.2 = mul nsw i32 %conv9.2, %conv.2 %add.2 = add nsw i32 %mul.2, %add.1 - %arrayidx6.3 = getelementptr inbounds i16, i16* %0, i32 3 - %8 = load i16, i16* %arrayidx6.3, align 2 + %arrayidx6.3 = getelementptr inbounds i16, ptr %0, i32 3 + %8 = load i16, ptr %arrayidx6.3, align 2 %conv.3 = sext i16 %8 to i32 - %arrayidx8.3 = getelementptr inbounds i16, i16* %1, i32 3 - %9 = load i16, i16* %arrayidx8.3, align 2 + %arrayidx8.3 = getelementptr inbounds i16, ptr %1, i32 3 + %9 = load i16, ptr %arrayidx8.3, align 2 %conv9.3 = sext i16 %9 to i32 %mul.3 = mul nsw i32 %conv9.3, %conv.3 %add.3 = add nsw i32 %mul.3, %add.2 - store i32 %add.3, i32* %arrayidx, align 4 + store i32 %add.3, ptr %arrayidx, align 4 %inc12 = add nuw i32 %i.030, 1 %exitcond = icmp eq i32 %inc12, %N br i1 %exitcond, label %for.cond.cleanup, label %for.body diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/overlapping.ll b/llvm/test/CodeGen/ARM/ParallelDSP/overlapping.ll index 09fcf33..98a72cb 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/overlapping.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/overlapping.ll @@ -1,40 +1,36 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -arm-parallel-dsp -mtriple=armv7-a -S %s -o - | FileCheck %s -define i32 @overlap_1(i16* %a, i16* %b, i32 %acc) { +define i32 @overlap_1(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @overlap_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i16* [[ADDR_A_1]] to i32* -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADDR_A_1]], align 2 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 ; CHECK-NEXT: [[TMP18:%.*]] = sext i16 [[TMP17]] to i32 ; CHECK-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP16]], 16 ; CHECK-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i16 ; CHECK-NEXT: [[TMP21:%.*]] = sext i16 [[TMP20]] to i32 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i16* [[ADDR_B_1]] to i32* -; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[TMP24:%.*]] = trunc i32 [[TMP23]] to i16 ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP16]], i32 [[TMP23]], i32 [[TMP10]]) ; CHECK-NEXT: [[TMP26:%.*]] = sext i16 [[TMP24]] to i32 @@ -46,10 +42,10 @@ define i32 @overlap_1(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP11]] ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP14]] -; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, i16* [[A]], i32 2 -; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, i16* [[B]], i32 2 -; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, i16* [[ADDR_A_2]], align 2 -; CHECK-NEXT: [[LD_B_2:%.*]] = load i16, i16* [[ADDR_B_2]], align 2 +; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 +; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, ptr [[B]], i32 2 +; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 +; CHECK-NEXT: [[LD_B_2:%.*]] = load i16, ptr [[ADDR_B_2]], align 2 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 ; CHECK-NEXT: [[SEXT_B_2:%.*]] = sext i16 [[LD_B_2]] to i32 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP21]], [[TMP29]] @@ -60,22 +56,22 @@ define i32 @overlap_1(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[TMP25]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 %mul.1 = mul i32 %sext.a.1, %sext.b.1 - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.b.2 = getelementptr i16, i16* %b, i32 2 - %ld.a.2 = load i16, i16* %addr.a.2 - %ld.b.2 = load i16, i16* %addr.b.2 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.b.2 = getelementptr i16, ptr %b, i32 2 + %ld.a.2 = load i16, ptr %addr.a.2 + %ld.b.2 = load i16, ptr %addr.b.2 %sext.a.2 = sext i16 %ld.a.2 to i32 %sext.b.2 = sext i16 %ld.b.2 to i32 %mul.2 = mul i32 %sext.a.2, %sext.b.2 @@ -88,40 +84,36 @@ entry: ; TODO: Is it really best to generate smlald for the first instruction? Does ; this just increase register pressure unnecessarily? -define i64 @overlap_64_1(i16* %a, i16* %b, i64 %acc) { +define i64 @overlap_64_1(ptr %a, ptr %b, i64 %acc) { ; CHECK-LABEL: @overlap_64_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP8]], i64 [[ACC:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i16* [[ADDR_A_1]] to i32* -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADDR_A_1]], align 2 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 ; CHECK-NEXT: [[TMP18:%.*]] = sext i16 [[TMP17]] to i32 ; CHECK-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP16]], 16 ; CHECK-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i16 ; CHECK-NEXT: [[TMP21:%.*]] = sext i16 [[TMP20]] to i32 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i16* [[ADDR_B_1]] to i32* -; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[TMP24:%.*]] = trunc i32 [[TMP23]] to i16 ; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP16]], i32 [[TMP23]], i64 [[TMP10]]) ; CHECK-NEXT: [[TMP26:%.*]] = sext i16 [[TMP24]] to i32 @@ -133,10 +125,10 @@ define i64 @overlap_64_1(i16* %a, i16* %b, i64 %acc) { ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP11]] ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP14]] -; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, i16* [[A]], i32 2 -; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, i16* [[B]], i32 2 -; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, i16* [[ADDR_A_2]], align 2 -; CHECK-NEXT: [[LD_B_2:%.*]] = load i16, i16* [[ADDR_B_2]], align 2 +; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 +; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, ptr [[B]], i32 2 +; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 +; CHECK-NEXT: [[LD_B_2:%.*]] = load i16, ptr [[ADDR_B_2]], align 2 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 ; CHECK-NEXT: [[SEXT_B_2:%.*]] = sext i16 [[LD_B_2]] to i32 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP21]], [[TMP29]] @@ -149,22 +141,22 @@ define i64 @overlap_64_1(i16* %a, i16* %b, i64 %acc) { ; CHECK-NEXT: ret i64 [[TMP25]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 %mul.1 = mul i32 %sext.a.1, %sext.b.1 - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.b.2 = getelementptr i16, i16* %b, i32 2 - %ld.a.2 = load i16, i16* %addr.a.2 - %ld.b.2 = load i16, i16* %addr.b.2 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.b.2 = getelementptr i16, ptr %b, i32 2 + %ld.a.2 = load i16, ptr %addr.a.2 + %ld.b.2 = load i16, ptr %addr.b.2 %sext.a.2 = sext i16 %ld.a.2 to i32 %sext.b.2 = sext i16 %ld.b.2 to i32 %mul.2 = mul i32 %sext.a.2, %sext.b.2 @@ -177,40 +169,38 @@ entry: ret i64 %res } -define i32 @overlap_2(i16* %a, i16* %b, i32 %acc) { +define i32 @overlap_2(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @overlap_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i16 ; CHECK-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP10]] ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[MUL_1]], [[ACC:%.*]] -; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, i16* [[A]], i32 2 -; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, i16* [[B]], i32 2 -; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, i16* [[ADDR_A_2]], align 2 -; CHECK-NEXT: [[LD_B_2:%.*]] = load i16, i16* [[ADDR_B_2]], align 2 +; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 +; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, ptr [[B]], i32 2 +; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 +; CHECK-NEXT: [[LD_B_2:%.*]] = load i16, ptr [[ADDR_B_2]], align 2 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 ; CHECK-NEXT: [[SEXT_B_2:%.*]] = sext i16 [[LD_B_2]] to i32 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[SEXT_B_2]], [[SEXT_A_2]] @@ -223,22 +213,22 @@ define i32 @overlap_2(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[TMP16]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 %mul.1 = mul i32 %sext.a.1, %sext.b.1 - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.b.2 = getelementptr i16, i16* %b, i32 2 - %ld.a.2 = load i16, i16* %addr.a.2 - %ld.b.2 = load i16, i16* %addr.b.2 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.b.2 = getelementptr i16, ptr %b, i32 2 + %ld.a.2 = load i16, ptr %addr.a.2 + %ld.b.2 = load i16, ptr %addr.b.2 %sext.a.2 = sext i16 %ld.a.2 to i32 %sext.b.2 = sext i16 %ld.b.2 to i32 %mul.2 = mul i32 %sext.b.2, %sext.a.2 @@ -249,33 +239,30 @@ entry: ret i32 %res } -define i32 @overlap_3(i16* %a, i16* %b, i32 %acc) { +define i32 @overlap_3(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @overlap_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i16* [[ADDR_B_1]] to i32* -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 ; CHECK-NEXT: [[TMP18:%.*]] = sext i16 [[TMP17]] to i32 ; CHECK-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP16]], 16 @@ -286,20 +273,19 @@ define i32 @overlap_3(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP11]] ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP14]] -; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, i16* [[A]], i32 2 -; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, i16* [[B]], i32 2 -; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, i16* [[A]], i32 3 -; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, i16* [[ADDR_A_2]], align 2 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i16* [[ADDR_A_2]] to i32* -; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 2 +; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 +; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, ptr [[B]], i32 2 +; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3 +; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 ; CHECK-NEXT: [[TMP24:%.*]] = trunc i32 [[TMP23]] to i16 ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP23]], i32 [[TMP16]], i32 [[TMP10]]) ; CHECK-NEXT: [[TMP26:%.*]] = sext i16 [[TMP24]] to i32 ; CHECK-NEXT: [[TMP27:%.*]] = lshr i32 [[TMP23]], 16 ; CHECK-NEXT: [[TMP28:%.*]] = trunc i32 [[TMP27]] to i16 ; CHECK-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 -; CHECK-NEXT: [[LD_B_2:%.*]] = load i16, i16* [[ADDR_B_2]], align 2 -; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, i16* [[ADDR_A_3]], align 2 +; CHECK-NEXT: [[LD_B_2:%.*]] = load i16, ptr [[ADDR_B_2]], align 2 +; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 ; CHECK-NEXT: [[SEXT_B_2:%.*]] = sext i16 [[LD_B_2]] to i32 ; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32 @@ -312,24 +298,24 @@ define i32 @overlap_3(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[TMP25]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 %mul.1 = mul i32 %sext.a.1, %sext.b.1 - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.b.2 = getelementptr i16, i16* %b, i32 2 - %addr.a.3 = getelementptr i16, i16* %a, i32 3 - %ld.a.2 = load i16, i16* %addr.a.2 - %ld.b.2 = load i16, i16* %addr.b.2 - %ld.a.3 = load i16, i16* %addr.a.3 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.b.2 = getelementptr i16, ptr %b, i32 2 + %addr.a.3 = getelementptr i16, ptr %a, i32 3 + %ld.a.2 = load i16, ptr %addr.a.2 + %ld.b.2 = load i16, ptr %addr.b.2 + %ld.a.3 = load i16, ptr %addr.a.3 %sext.a.2 = sext i16 %ld.a.2 to i32 %sext.b.2 = sext i16 %ld.b.2 to i32 %sext.a.3 = sext i16 %ld.a.3 to i32 @@ -342,33 +328,30 @@ entry: ret i32 %res } -define i32 @overlap_4(i16* %a, i16* %b, i32 %acc) { +define i32 @overlap_4(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @overlap_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1 -; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, i16* [[A]], align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 +; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 +; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 -; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, i16* [[B]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[B]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, i16* [[ADDR_A_1]], align 2 -; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, i16* [[ADDR_B_1]], align 2 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i16* [[ADDR_B_1]] to i32* -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 2 +; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 +; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADDR_B_1]], align 2 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 ; CHECK-NEXT: [[TMP18:%.*]] = sext i16 [[TMP17]] to i32 ; CHECK-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP16]], 16 @@ -379,20 +362,19 @@ define i32 @overlap_4(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP11]] ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP14]] -; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, i16* [[A]], i32 2 -; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, i16* [[B]], i32 2 -; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, i16* [[A]], i32 3 -; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, i16* [[ADDR_A_2]], align 2 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i16* [[ADDR_A_2]] to i32* -; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 2 +; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 +; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, ptr [[B]], i32 2 +; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3 +; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 ; CHECK-NEXT: [[TMP24:%.*]] = trunc i32 [[TMP23]] to i16 ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP23]], i32 [[TMP16]], i32 [[TMP10]]) ; CHECK-NEXT: [[TMP26:%.*]] = sext i16 [[TMP24]] to i32 ; CHECK-NEXT: [[TMP27:%.*]] = lshr i32 [[TMP23]], 16 ; CHECK-NEXT: [[TMP28:%.*]] = trunc i32 [[TMP27]] to i16 ; CHECK-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 -; CHECK-NEXT: [[LD_B_2:%.*]] = load i16, i16* [[ADDR_B_2]], align 2 -; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, i16* [[ADDR_A_3]], align 2 +; CHECK-NEXT: [[LD_B_2:%.*]] = load i16, ptr [[ADDR_B_2]], align 2 +; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 ; CHECK-NEXT: [[SEXT_B_2:%.*]] = sext i16 [[LD_B_2]] to i32 ; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32 @@ -405,24 +387,24 @@ define i32 @overlap_4(i16* %a, i16* %b, i32 %acc) { ; CHECK-NEXT: ret i32 [[TMP25]] ; entry: - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.0 = load i16, i16* %a + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b - %ld.a.1 = load i16, i16* %addr.a.1 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.0 = load i16, ptr %b + %ld.a.1 = load i16, ptr %addr.a.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.a.1 = sext i16 %ld.a.1 to i32 %sext.b.1 = sext i16 %ld.b.1 to i32 %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 %mul.1 = mul i32 %sext.a.1, %sext.b.1 - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.b.2 = getelementptr i16, i16* %b, i32 2 - %addr.a.3 = getelementptr i16, i16* %a, i32 3 - %ld.a.2 = load i16, i16* %addr.a.2 - %ld.b.2 = load i16, i16* %addr.b.2 - %ld.a.3 = load i16, i16* %addr.a.3 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.b.2 = getelementptr i16, ptr %b, i32 2 + %addr.a.3 = getelementptr i16, ptr %a, i32 3 + %ld.a.2 = load i16, ptr %addr.a.2 + %ld.b.2 = load i16, ptr %addr.b.2 + %ld.a.3 = load i16, ptr %addr.a.3 %sext.a.2 = sext i16 %ld.a.2 to i32 %sext.b.2 = sext i16 %ld.b.2 to i32 %sext.a.3 = sext i16 %ld.a.3 to i32 diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/pr43073.ll b/llvm/test/CodeGen/ARM/ParallelDSP/pr43073.ll index caf5bb3..56aae8d 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/pr43073.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/pr43073.ll @@ -1,238 +1,226 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -mtriple=thumbv7-unknown-linux-gnueabihf -arm-parallel-dsp -dce %s -S -o - | FileCheck %s -define i32 @first_mul_invalid(i16* nocapture readonly %in, i16* nocapture readonly %b) { +define i32 @first_mul_invalid(ptr nocapture readonly %in, ptr nocapture readonly %b) { ; CHECK-LABEL: @first_mul_invalid( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[IN:%.*]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[IN:%.*]], align 2 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[B:%.*]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[B:%.*]], align 2 ; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @bar(i32 [[CONV]], i32 [[CONV2]]) -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[IN]], i32 -1 -; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX3]], align 2 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 ; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP2]] to i32 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX5]], align 2 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2 ; CHECK-NEXT: [[CONV6:%.*]] = sext i16 [[TMP3]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV6]], [[CONV4]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[CALL]] -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, i16* [[IN]], i32 -3 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[ARRAYIDX13]] to i32* -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 2 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[ARRAYIDX9]] to i32* -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 2 +; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -3 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13]], align 2 +; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX9]], align 2 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP5]], i32 [[TMP7]], i32 [[ADD]]) -; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, i16* [[IN]], i32 -5 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[ARRAYIDX25]] to i32* -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 2 -; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 4 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16* [[ARRAYIDX21]] to i32* -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 2 +; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -5 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX25]], align 2 +; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX21]], align 2 ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP10]], i32 [[TMP12]], i32 [[TMP8]]) ; CHECK-NEXT: ret i32 [[TMP13]] ; entry: - %0 = load i16, i16* %in, align 2 + %0 = load i16, ptr %in, align 2 %conv = sext i16 %0 to i32 - %1 = load i16, i16* %b, align 2 + %1 = load i16, ptr %b, align 2 %conv2 = sext i16 %1 to i32 %call = tail call i32 @bar(i32 %conv, i32 %conv2) - %arrayidx3 = getelementptr inbounds i16, i16* %in, i32 -1 - %2 = load i16, i16* %arrayidx3, align 2 + %arrayidx3 = getelementptr inbounds i16, ptr %in, i32 -1 + %2 = load i16, ptr %arrayidx3, align 2 %conv4 = sext i16 %2 to i32 - %arrayidx5 = getelementptr inbounds i16, i16* %b, i32 1 - %3 = load i16, i16* %arrayidx5, align 2 + %arrayidx5 = getelementptr inbounds i16, ptr %b, i32 1 + %3 = load i16, ptr %arrayidx5, align 2 %conv6 = sext i16 %3 to i32 %mul = mul nsw i32 %conv6, %conv4 %add = add i32 %mul, %call - %arrayidx7 = getelementptr inbounds i16, i16* %in, i32 -2 - %4 = load i16, i16* %arrayidx7, align 2 + %arrayidx7 = getelementptr inbounds i16, ptr %in, i32 -2 + %4 = load i16, ptr %arrayidx7, align 2 %conv8 = sext i16 %4 to i32 - %arrayidx9 = getelementptr inbounds i16, i16* %b, i32 2 - %5 = load i16, i16* %arrayidx9, align 2 + %arrayidx9 = getelementptr inbounds i16, ptr %b, i32 2 + %5 = load i16, ptr %arrayidx9, align 2 %conv10 = sext i16 %5 to i32 %mul11 = mul nsw i32 %conv10, %conv8 %add12 = add i32 %add, %mul11 - %arrayidx13 = getelementptr inbounds i16, i16* %in, i32 -3 - %6 = load i16, i16* %arrayidx13, align 2 + %arrayidx13 = getelementptr inbounds i16, ptr %in, i32 -3 + %6 = load i16, ptr %arrayidx13, align 2 %conv14 = sext i16 %6 to i32 - %arrayidx15 = getelementptr inbounds i16, i16* %b, i32 3 - %7 = load i16, i16* %arrayidx15, align 2 + %arrayidx15 = getelementptr inbounds i16, ptr %b, i32 3 + %7 = load i16, ptr %arrayidx15, align 2 %conv16 = sext i16 %7 to i32 %mul17 = mul nsw i32 %conv16, %conv14 %add18 = add i32 %add12, %mul17 - %arrayidx19 = getelementptr inbounds i16, i16* %in, i32 -4 - %8 = load i16, i16* %arrayidx19, align 2 + %arrayidx19 = getelementptr inbounds i16, ptr %in, i32 -4 + %8 = load i16, ptr %arrayidx19, align 2 %conv20 = sext i16 %8 to i32 - %arrayidx21 = getelementptr inbounds i16, i16* %b, i32 4 - %9 = load i16, i16* %arrayidx21, align 2 + %arrayidx21 = getelementptr inbounds i16, ptr %b, i32 4 + %9 = load i16, ptr %arrayidx21, align 2 %conv22 = sext i16 %9 to i32 %mul23 = mul nsw i32 %conv22, %conv20 %add24 = add i32 %add18, %mul23 - %arrayidx25 = getelementptr inbounds i16, i16* %in, i32 -5 - %10 = load i16, i16* %arrayidx25, align 2 + %arrayidx25 = getelementptr inbounds i16, ptr %in, i32 -5 + %10 = load i16, ptr %arrayidx25, align 2 %conv26 = sext i16 %10 to i32 - %arrayidx27 = getelementptr inbounds i16, i16* %b, i32 5 - %11 = load i16, i16* %arrayidx27, align 2 + %arrayidx27 = getelementptr inbounds i16, ptr %b, i32 5 + %11 = load i16, ptr %arrayidx27, align 2 %conv28 = sext i16 %11 to i32 %mul29 = mul nsw i32 %conv28, %conv26 %add30 = add i32 %add24, %mul29 ret i32 %add30 } -define i32 @with_no_acc_input(i16* nocapture readonly %in, i16* nocapture readonly %b) { +define i32 @with_no_acc_input(ptr nocapture readonly %in, ptr nocapture readonly %b) { ; CHECK-LABEL: @with_no_acc_input( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[IN:%.*]], i32 -1 -; CHECK-NEXT: [[LD_2:%.*]] = load i16, i16* [[ARRAYIDX3]], align 2 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[IN:%.*]], i32 -1 +; CHECK-NEXT: [[LD_2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 ; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[LD_2]] to i32 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i32 1 -; CHECK-NEXT: [[LD_3:%.*]] = load i16, i16* [[ARRAYIDX5]], align 2 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[LD_3:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2 ; CHECK-NEXT: [[CONV6:%.*]] = sext i16 [[LD_3]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV6]], [[CONV4]] -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, i16* [[IN]], i32 -3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[ARRAYIDX13]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 2 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[ARRAYIDX9]] to i32* -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 2 +; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -3 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX13]], align 2 +; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX9]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP3]], i32 [[MUL]]) -; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, i16* [[IN]], i32 -5 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[ARRAYIDX25]] to i32* -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 2 -; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[ARRAYIDX21]] to i32* -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 2 +; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -5 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX25]], align 2 +; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX21]], align 2 ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP6]], i32 [[TMP8]], i32 [[TMP4]]) ; CHECK-NEXT: ret i32 [[TMP9]] ; entry: - %arrayidx3 = getelementptr inbounds i16, i16* %in, i32 -1 - %ld.2 = load i16, i16* %arrayidx3, align 2 + %arrayidx3 = getelementptr inbounds i16, ptr %in, i32 -1 + %ld.2 = load i16, ptr %arrayidx3, align 2 %conv4 = sext i16 %ld.2 to i32 - %arrayidx5 = getelementptr inbounds i16, i16* %b, i32 1 - %ld.3 = load i16, i16* %arrayidx5, align 2 + %arrayidx5 = getelementptr inbounds i16, ptr %b, i32 1 + %ld.3 = load i16, ptr %arrayidx5, align 2 %conv6 = sext i16 %ld.3 to i32 %mul = mul nsw i32 %conv6, %conv4 - %arrayidx7 = getelementptr inbounds i16, i16* %in, i32 -2 - %ld.4 = load i16, i16* %arrayidx7, align 2 + %arrayidx7 = getelementptr inbounds i16, ptr %in, i32 -2 + %ld.4 = load i16, ptr %arrayidx7, align 2 %conv8 = sext i16 %ld.4 to i32 - %arrayidx9 = getelementptr inbounds i16, i16* %b, i32 2 - %ld.5 = load i16, i16* %arrayidx9, align 2 + %arrayidx9 = getelementptr inbounds i16, ptr %b, i32 2 + %ld.5 = load i16, ptr %arrayidx9, align 2 %conv10 = sext i16 %ld.5 to i32 %mul11 = mul nsw i32 %conv10, %conv8 %add12 = add i32 %mul, %mul11 - %arrayidx13 = getelementptr inbounds i16, i16* %in, i32 -3 - %ld.6 = load i16, i16* %arrayidx13, align 2 + %arrayidx13 = getelementptr inbounds i16, ptr %in, i32 -3 + %ld.6 = load i16, ptr %arrayidx13, align 2 %conv14 = sext i16 %ld.6 to i32 - %arrayidx15 = getelementptr inbounds i16, i16* %b, i32 3 - %ld.7 = load i16, i16* %arrayidx15, align 2 + %arrayidx15 = getelementptr inbounds i16, ptr %b, i32 3 + %ld.7 = load i16, ptr %arrayidx15, align 2 %conv16 = sext i16 %ld.7 to i32 %mul17 = mul nsw i32 %conv16, %conv14 %add18 = add i32 %add12, %mul17 - %arrayidx19 = getelementptr inbounds i16, i16* %in, i32 -4 - %ld.8 = load i16, i16* %arrayidx19, align 2 + %arrayidx19 = getelementptr inbounds i16, ptr %in, i32 -4 + %ld.8 = load i16, ptr %arrayidx19, align 2 %conv20 = sext i16 %ld.8 to i32 - %arrayidx21 = getelementptr inbounds i16, i16* %b, i32 4 - %ld.9 = load i16, i16* %arrayidx21, align 2 + %arrayidx21 = getelementptr inbounds i16, ptr %b, i32 4 + %ld.9 = load i16, ptr %arrayidx21, align 2 %conv22 = sext i16 %ld.9 to i32 %mul23 = mul nsw i32 %conv22, %conv20 %add24 = add i32 %add18, %mul23 - %arrayidx25 = getelementptr inbounds i16, i16* %in, i32 -5 - %ld.10 = load i16, i16* %arrayidx25, align 2 + %arrayidx25 = getelementptr inbounds i16, ptr %in, i32 -5 + %ld.10 = load i16, ptr %arrayidx25, align 2 %conv26 = sext i16 %ld.10 to i32 - %arrayidx27 = getelementptr inbounds i16, i16* %b, i32 5 - %ld.11 = load i16, i16* %arrayidx27, align 2 + %arrayidx27 = getelementptr inbounds i16, ptr %b, i32 5 + %ld.11 = load i16, ptr %arrayidx27, align 2 %conv28 = sext i16 %ld.11 to i32 %mul29 = mul nsw i32 %conv28, %conv26 %add30 = add i32 %add24, %mul29 ret i32 %add30 } -define i64 @with_64bit_acc(i16* nocapture readonly %in, i16* nocapture readonly %b) { +define i64 @with_64bit_acc(ptr nocapture readonly %in, ptr nocapture readonly %b) { ; CHECK-LABEL: @with_64bit_acc( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[IN:%.*]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[IN:%.*]], align 2 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[B:%.*]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[B:%.*]], align 2 ; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @bar(i32 [[CONV]], i32 [[CONV2]]) ; CHECK-NEXT: [[SEXT_0:%.*]] = sext i32 [[CALL]] to i64 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[IN]], i32 -1 -; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX3]], align 2 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 ; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP2]] to i32 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX5]], align 2 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2 ; CHECK-NEXT: [[CONV6:%.*]] = sext i16 [[TMP3]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV6]], [[CONV4]] ; CHECK-NEXT: [[SEXT_1:%.*]] = sext i32 [[MUL]] to i64 ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[SEXT_0]], [[SEXT_1]] -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, i16* [[IN]], i32 -3 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[ARRAYIDX13]] to i32* -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 2 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[ARRAYIDX9]] to i32* -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 2 +; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -3 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13]], align 2 +; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX9]], align 2 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP5]], i32 [[TMP7]], i64 [[ADD]]) -; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, i16* [[IN]], i32 -5 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[ARRAYIDX25]] to i32* -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 2 -; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 4 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16* [[ARRAYIDX21]] to i32* -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 2 +; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -5 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX25]], align 2 +; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX21]], align 2 ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP10]], i32 [[TMP12]], i64 [[TMP8]]) ; CHECK-NEXT: ret i64 [[TMP13]] ; entry: - %0 = load i16, i16* %in, align 2 + %0 = load i16, ptr %in, align 2 %conv = sext i16 %0 to i32 - %1 = load i16, i16* %b, align 2 + %1 = load i16, ptr %b, align 2 %conv2 = sext i16 %1 to i32 %call = tail call i32 @bar(i32 %conv, i32 %conv2) %sext.0 = sext i32 %call to i64 - %arrayidx3 = getelementptr inbounds i16, i16* %in, i32 -1 - %2 = load i16, i16* %arrayidx3, align 2 + %arrayidx3 = getelementptr inbounds i16, ptr %in, i32 -1 + %2 = load i16, ptr %arrayidx3, align 2 %conv4 = sext i16 %2 to i32 - %arrayidx5 = getelementptr inbounds i16, i16* %b, i32 1 - %3 = load i16, i16* %arrayidx5, align 2 + %arrayidx5 = getelementptr inbounds i16, ptr %b, i32 1 + %3 = load i16, ptr %arrayidx5, align 2 %conv6 = sext i16 %3 to i32 %mul = mul nsw i32 %conv6, %conv4 %sext.1 = sext i32 %mul to i64 %add = add i64 %sext.0, %sext.1 - %arrayidx7 = getelementptr inbounds i16, i16* %in, i32 -2 - %4 = load i16, i16* %arrayidx7, align 2 + %arrayidx7 = getelementptr inbounds i16, ptr %in, i32 -2 + %4 = load i16, ptr %arrayidx7, align 2 %conv8 = sext i16 %4 to i32 - %arrayidx9 = getelementptr inbounds i16, i16* %b, i32 2 - %5 = load i16, i16* %arrayidx9, align 2 + %arrayidx9 = getelementptr inbounds i16, ptr %b, i32 2 + %5 = load i16, ptr %arrayidx9, align 2 %conv10 = sext i16 %5 to i32 %mul11 = mul nsw i32 %conv10, %conv8 %sext.2 = sext i32 %mul11 to i64 %add12 = add i64 %add, %sext.2 - %arrayidx13 = getelementptr inbounds i16, i16* %in, i32 -3 - %6 = load i16, i16* %arrayidx13, align 2 + %arrayidx13 = getelementptr inbounds i16, ptr %in, i32 -3 + %6 = load i16, ptr %arrayidx13, align 2 %conv14 = sext i16 %6 to i32 - %arrayidx15 = getelementptr inbounds i16, i16* %b, i32 3 - %7 = load i16, i16* %arrayidx15, align 2 + %arrayidx15 = getelementptr inbounds i16, ptr %b, i32 3 + %7 = load i16, ptr %arrayidx15, align 2 %conv16 = sext i16 %7 to i32 %mul17 = mul nsw i32 %conv16, %conv14 %sext.3 = sext i32 %mul17 to i64 %add18 = add i64 %add12, %sext.3 - %arrayidx19 = getelementptr inbounds i16, i16* %in, i32 -4 - %8 = load i16, i16* %arrayidx19, align 2 + %arrayidx19 = getelementptr inbounds i16, ptr %in, i32 -4 + %8 = load i16, ptr %arrayidx19, align 2 %conv20 = sext i16 %8 to i32 - %arrayidx21 = getelementptr inbounds i16, i16* %b, i32 4 - %9 = load i16, i16* %arrayidx21, align 2 + %arrayidx21 = getelementptr inbounds i16, ptr %b, i32 4 + %9 = load i16, ptr %arrayidx21, align 2 %conv22 = sext i16 %9 to i32 %mul23 = mul nsw i32 %conv22, %conv20 %sext.4 = sext i32 %mul23 to i64 %add24 = add i64 %add18, %sext.4 - %arrayidx25 = getelementptr inbounds i16, i16* %in, i32 -5 - %10 = load i16, i16* %arrayidx25, align 2 + %arrayidx25 = getelementptr inbounds i16, ptr %in, i32 -5 + %10 = load i16, ptr %arrayidx25, align 2 %conv26 = sext i16 %10 to i32 - %arrayidx27 = getelementptr inbounds i16, i16* %b, i32 5 - %11 = load i16, i16* %arrayidx27, align 2 + %arrayidx27 = getelementptr inbounds i16, ptr %b, i32 5 + %11 = load i16, ptr %arrayidx27, align 2 %conv28 = sext i16 %11 to i32 %mul29 = mul nsw i32 %conv28, %conv26 %sext.5 = sext i32 %mul29 to i64 @@ -240,34 +228,32 @@ entry: ret i64 %add30 } -define i64 @with_64bit_add_acc(i16* nocapture readonly %px.10756.unr, i16* nocapture readonly %py.8757.unr, i32 %acc) { +define i64 @with_64bit_add_acc(ptr nocapture readonly %px.10756.unr, ptr nocapture readonly %py.8757.unr, i32 %acc) { ; CHECK-LABEL: @with_64bit_add_acc( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SUM_3758_UNR:%.*]] = sext i32 [[ACC:%.*]] to i64 ; CHECK-NEXT: br label [[BB_1:%.*]] ; CHECK: bb.1: -; CHECK-NEXT: [[INCDEC_PTR184_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PX_10756_UNR:%.*]], i32 1 -; CHECK-NEXT: [[TMP216:%.*]] = load i16, i16* [[PX_10756_UNR]], align 2 +; CHECK-NEXT: [[INCDEC_PTR184_EPIL:%.*]] = getelementptr inbounds i16, ptr [[PX_10756_UNR:%.*]], i32 1 +; CHECK-NEXT: [[TMP216:%.*]] = load i16, ptr [[PX_10756_UNR]], align 2 ; CHECK-NEXT: [[CONV185_EPIL:%.*]] = sext i16 [[TMP216]] to i32 -; CHECK-NEXT: [[INCDEC_PTR186_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PY_8757_UNR:%.*]], i32 -1 -; CHECK-NEXT: [[TMP217:%.*]] = load i16, i16* [[PY_8757_UNR]], align 2 +; CHECK-NEXT: [[INCDEC_PTR186_EPIL:%.*]] = getelementptr inbounds i16, ptr [[PY_8757_UNR:%.*]], i32 -1 +; CHECK-NEXT: [[TMP217:%.*]] = load i16, ptr [[PY_8757_UNR]], align 2 ; CHECK-NEXT: [[CONV187_EPIL:%.*]] = sext i16 [[TMP217]] to i32 ; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul nsw i32 [[CONV187_EPIL]], [[CONV185_EPIL]] ; CHECK-NEXT: [[CONV188_EPIL:%.*]] = sext i32 [[MUL_EPIL]] to i64 ; CHECK-NEXT: [[ADD189_EPIL:%.*]] = add nsw i64 [[SUM_3758_UNR]], [[CONV188_EPIL]] -; CHECK-NEXT: [[INCDEC_PTR190_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PX_10756_UNR]], i32 2 -; CHECK-NEXT: [[TMP218:%.*]] = load i16, i16* [[INCDEC_PTR184_EPIL]], align 2 +; CHECK-NEXT: [[INCDEC_PTR190_EPIL:%.*]] = getelementptr inbounds i16, ptr [[PX_10756_UNR]], i32 2 +; CHECK-NEXT: [[TMP218:%.*]] = load i16, ptr [[INCDEC_PTR184_EPIL]], align 2 ; CHECK-NEXT: [[CONV191_EPIL:%.*]] = sext i16 [[TMP218]] to i32 -; CHECK-NEXT: [[TMP219:%.*]] = load i16, i16* [[INCDEC_PTR186_EPIL]], align 2 +; CHECK-NEXT: [[TMP219:%.*]] = load i16, ptr [[INCDEC_PTR186_EPIL]], align 2 ; CHECK-NEXT: [[CONV193_EPIL:%.*]] = sext i16 [[TMP219]] to i32 ; CHECK-NEXT: [[MUL194_EPIL:%.*]] = mul nsw i32 [[CONV193_EPIL]], [[CONV191_EPIL]] ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[MUL194_EPIL]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ADD189_EPIL]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[INCDEC_PTR190_EPIL]] to i32* -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 2 -; CHECK-NEXT: [[INCDEC_PTR199_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PY_8757_UNR]], i32 -3 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[INCDEC_PTR199_EPIL]] to i32* -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[INCDEC_PTR190_EPIL]], align 2 +; CHECK-NEXT: [[INCDEC_PTR199_EPIL:%.*]] = getelementptr inbounds i16, ptr [[PY_8757_UNR]], i32 -3 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[INCDEC_PTR199_EPIL]], align 2 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP5]], i32 [[TMP3]], i64 [[TMP1]]) ; CHECK-NEXT: ret i64 [[TMP6]] ; @@ -276,36 +262,36 @@ entry: br label %bb.1 bb.1: - %incdec.ptr184.epil = getelementptr inbounds i16, i16* %px.10756.unr, i32 1 - %tmp216 = load i16, i16* %px.10756.unr, align 2 + %incdec.ptr184.epil = getelementptr inbounds i16, ptr %px.10756.unr, i32 1 + %tmp216 = load i16, ptr %px.10756.unr, align 2 %conv185.epil = sext i16 %tmp216 to i32 - %incdec.ptr186.epil = getelementptr inbounds i16, i16* %py.8757.unr, i32 -1 - %tmp217 = load i16, i16* %py.8757.unr, align 2 + %incdec.ptr186.epil = getelementptr inbounds i16, ptr %py.8757.unr, i32 -1 + %tmp217 = load i16, ptr %py.8757.unr, align 2 %conv187.epil = sext i16 %tmp217 to i32 %mul.epil = mul nsw i32 %conv187.epil, %conv185.epil %conv188.epil = sext i32 %mul.epil to i64 %add189.epil = add nsw i64 %sum.3758.unr, %conv188.epil - %incdec.ptr190.epil = getelementptr inbounds i16, i16* %px.10756.unr, i32 2 - %tmp218 = load i16, i16* %incdec.ptr184.epil, align 2 + %incdec.ptr190.epil = getelementptr inbounds i16, ptr %px.10756.unr, i32 2 + %tmp218 = load i16, ptr %incdec.ptr184.epil, align 2 %conv191.epil = sext i16 %tmp218 to i32 - %incdec.ptr192.epil = getelementptr inbounds i16, i16* %py.8757.unr, i32 -2 - %tmp219 = load i16, i16* %incdec.ptr186.epil, align 2 + %incdec.ptr192.epil = getelementptr inbounds i16, ptr %py.8757.unr, i32 -2 + %tmp219 = load i16, ptr %incdec.ptr186.epil, align 2 %conv193.epil = sext i16 %tmp219 to i32 %mul194.epil = mul nsw i32 %conv193.epil, %conv191.epil %conv195.epil = sext i32 %mul194.epil to i64 %add196.epil = add nsw i64 %add189.epil, %conv195.epil - %incdec.ptr197.epil = getelementptr inbounds i16, i16* %px.10756.unr, i32 3 - %tmp220 = load i16, i16* %incdec.ptr190.epil, align 2 + %incdec.ptr197.epil = getelementptr inbounds i16, ptr %px.10756.unr, i32 3 + %tmp220 = load i16, ptr %incdec.ptr190.epil, align 2 %conv198.epil = sext i16 %tmp220 to i32 - %incdec.ptr199.epil = getelementptr inbounds i16, i16* %py.8757.unr, i32 -3 - %tmp221 = load i16, i16* %incdec.ptr192.epil, align 2 + %incdec.ptr199.epil = getelementptr inbounds i16, ptr %py.8757.unr, i32 -3 + %tmp221 = load i16, ptr %incdec.ptr192.epil, align 2 %conv200.epil = sext i16 %tmp221 to i32 %mul201.epil = mul nsw i32 %conv200.epil, %conv198.epil %conv202.epil = sext i32 %mul201.epil to i64 %add203.epil = add nsw i64 %add196.epil, %conv202.epil - %tmp222 = load i16, i16* %incdec.ptr197.epil, align 2 + %tmp222 = load i16, ptr %incdec.ptr197.epil, align 2 %conv205.epil = sext i16 %tmp222 to i32 - %tmp223 = load i16, i16* %incdec.ptr199.epil, align 2 + %tmp223 = load i16, ptr %incdec.ptr199.epil, align 2 %conv207.epil = sext i16 %tmp223 to i32 %mul208.epil = mul nsw i32 %conv207.epil, %conv205.epil %conv209.epil = sext i32 %mul208.epil to i64 diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/sext-acc.ll b/llvm/test/CodeGen/ARM/ParallelDSP/sext-acc.ll index 6974a00..fdbe85b 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/sext-acc.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/sext-acc.ll @@ -1,28 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -arm-parallel-dsp -dce -mtriple=armv7-a -S %s -o - | FileCheck %s -define i64 @sext_acc_1(i16* %a, i16* %b, i32 %acc) { +define i64 @sext_acc_1(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @sext_acc_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A:%.*]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[B:%.*]] to i32* -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[ACC:%.*]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP3]], i64 [[TMP4]]) ; CHECK-NEXT: ret i64 [[TMP5]] ; entry: - %ld.a.0 = load i16, i16* %a + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b + %ld.b.0 = load i16, ptr %b %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.1 = load i16, i16* %addr.a.1 + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.1 = load i16, ptr %addr.a.1 %sext.a.1 = sext i16 %ld.a.1 to i32 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.b.1 = sext i16 %ld.b.1 to i32 %mul.1 = mul i32 %sext.a.1, %sext.b.1 %sext.mul.0 = sext i32 %mul.0 to i64 @@ -33,35 +31,31 @@ entry: ret i64 %res } -define i64 @sext_acc_2(i16* %a, i16* %b, i32 %acc) { +define i64 @sext_acc_2(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @sext_acc_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A:%.*]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[B:%.*]] to i32* -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 2 -; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, i16* [[A]], i32 2 -; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, i16* [[B]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[ADDR_A_2]] to i32* -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[ADDR_B_2]] to i32* -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 +; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, ptr [[B]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ADDR_B_2]], align 2 ; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[ACC:%.*]] to i64 ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP3]], i64 [[TMP8]]) ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP5]], i32 [[TMP7]], i64 [[TMP9]]) ; CHECK-NEXT: ret i64 [[TMP10]] ; entry: - %ld.a.0 = load i16, i16* %a + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b + %ld.b.0 = load i16, ptr %b %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.1 = load i16, i16* %addr.a.1 + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.1 = load i16, ptr %addr.a.1 %sext.a.1 = sext i16 %ld.a.1 to i32 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.b.1 = sext i16 %ld.b.1 to i32 %mul.1 = mul i32 %sext.a.1, %sext.b.1 %sext.mul.0 = sext i32 %mul.0 to i64 @@ -69,19 +63,19 @@ entry: %add = add i64 %sext.mul.0, %sext.mul.1 %sext.acc = sext i32 %acc to i64 %add.1 = add i64 %add, %sext.acc - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.b.2 = getelementptr i16, i16* %b, i32 2 - %ld.a.2 = load i16, i16* %addr.a.2 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.b.2 = getelementptr i16, ptr %b, i32 2 + %ld.a.2 = load i16, ptr %addr.a.2 %sext.a.2 = sext i16 %ld.a.2 to i32 - %ld.b.2 = load i16, i16* %addr.b.2 + %ld.b.2 = load i16, ptr %addr.b.2 %sext.b.2 = sext i16 %ld.b.2 to i32 %mul.2 = mul i32 %sext.a.2, %sext.b.2 %sext.mul.2 = sext i32 %mul.2 to i64 - %addr.a.3 = getelementptr i16, i16* %a, i32 3 - %addr.b.3 = getelementptr i16, i16* %b, i32 3 - %ld.a.3 = load i16, i16* %addr.a.3 + %addr.a.3 = getelementptr i16, ptr %a, i32 3 + %addr.b.3 = getelementptr i16, ptr %b, i32 3 + %ld.a.3 = load i16, ptr %addr.a.3 %sext.a.3 = sext i16 %ld.a.3 to i32 - %ld.b.3 = load i16, i16* %addr.b.3 + %ld.b.3 = load i16, ptr %addr.b.3 %sext.b.3 = sext i16 %ld.b.3 to i32 %mul.3 = mul i32 %sext.a.3, %sext.b.3 %sext.mul.3 = sext i32 %mul.3 to i64 @@ -90,53 +84,49 @@ entry: ret i64 %add.3 } -define i64 @sext_acc_3(i16* %a, i16* %b, i32 %acc) { +define i64 @sext_acc_3(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @sext_acc_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A:%.*]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[B:%.*]] to i32* -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 2 -; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, i16* [[A]], i32 2 -; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, i16* [[B]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[ADDR_A_2]] to i32* -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[ADDR_B_2]] to i32* -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 +; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, ptr [[B]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ADDR_B_2]], align 2 ; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[ACC:%.*]] to i64 ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP3]], i64 [[TMP8]]) ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP5]], i32 [[TMP7]], i64 [[TMP9]]) ; CHECK-NEXT: ret i64 [[TMP10]] ; entry: - %ld.a.0 = load i16, i16* %a + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b + %ld.b.0 = load i16, ptr %b %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.1 = load i16, i16* %addr.a.1 + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.1 = load i16, ptr %addr.a.1 %sext.a.1 = sext i16 %ld.a.1 to i32 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.b.1 = sext i16 %ld.b.1 to i32 %mul.1 = mul i32 %sext.a.1, %sext.b.1 %sext.mul.0 = sext i32 %mul.0 to i64 %sext.mul.1 = sext i32 %mul.1 to i64 %add = add i64 %sext.mul.0, %sext.mul.1 - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.b.2 = getelementptr i16, i16* %b, i32 2 - %ld.a.2 = load i16, i16* %addr.a.2 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.b.2 = getelementptr i16, ptr %b, i32 2 + %ld.a.2 = load i16, ptr %addr.a.2 %sext.a.2 = sext i16 %ld.a.2 to i32 - %ld.b.2 = load i16, i16* %addr.b.2 + %ld.b.2 = load i16, ptr %addr.b.2 %sext.b.2 = sext i16 %ld.b.2 to i32 %mul.2 = mul i32 %sext.a.2, %sext.b.2 %sext.mul.2 = sext i32 %mul.2 to i64 - %addr.a.3 = getelementptr i16, i16* %a, i32 3 - %addr.b.3 = getelementptr i16, i16* %b, i32 3 - %ld.a.3 = load i16, i16* %addr.a.3 + %addr.a.3 = getelementptr i16, ptr %a, i32 3 + %addr.b.3 = getelementptr i16, ptr %b, i32 3 + %ld.a.3 = load i16, ptr %addr.a.3 %sext.a.3 = sext i16 %ld.a.3 to i32 - %ld.b.3 = load i16, i16* %addr.b.3 + %ld.b.3 = load i16, ptr %addr.b.3 %sext.b.3 = sext i16 %ld.b.3 to i32 %mul.3 = mul i32 %sext.a.3, %sext.b.3 %sext.mul.3 = sext i32 %mul.3 to i64 @@ -147,52 +137,48 @@ entry: ret i64 %add.3 } -define i64 @sext_acc_4(i16* %a, i16* %b, i32 %acc) { +define i64 @sext_acc_4(ptr %a, ptr %b, i32 %acc) { ; CHECK-LABEL: @sext_acc_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A:%.*]] to i32* -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[B:%.*]] to i32* -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 2 -; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, i16* [[A]], i32 2 -; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, i16* [[B]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[ADDR_A_2]] to i32* -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[ADDR_B_2]] to i32* -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 +; CHECK-NEXT: [[ADDR_B_2:%.*]] = getelementptr i16, ptr [[B]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ADDR_B_2]], align 2 ; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[ACC:%.*]] to i64 ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP3]], i64 [[TMP8]]) ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP5]], i32 [[TMP7]], i64 [[TMP9]]) ; CHECK-NEXT: ret i64 [[TMP10]] ; entry: - %ld.a.0 = load i16, i16* %a + %ld.a.0 = load i16, ptr %a %sext.a.0 = sext i16 %ld.a.0 to i32 - %ld.b.0 = load i16, i16* %b + %ld.b.0 = load i16, ptr %b %sext.b.0 = sext i16 %ld.b.0 to i32 %mul.0 = mul i32 %sext.a.0, %sext.b.0 - %addr.a.1 = getelementptr i16, i16* %a, i32 1 - %addr.b.1 = getelementptr i16, i16* %b, i32 1 - %ld.a.1 = load i16, i16* %addr.a.1 + %addr.a.1 = getelementptr i16, ptr %a, i32 1 + %addr.b.1 = getelementptr i16, ptr %b, i32 1 + %ld.a.1 = load i16, ptr %addr.a.1 %sext.a.1 = sext i16 %ld.a.1 to i32 - %ld.b.1 = load i16, i16* %addr.b.1 + %ld.b.1 = load i16, ptr %addr.b.1 %sext.b.1 = sext i16 %ld.b.1 to i32 %mul.1 = mul i32 %sext.a.1, %sext.b.1 %add = add i32 %mul.0, %mul.1 %sext.add = sext i32 %add to i64 - %addr.a.2 = getelementptr i16, i16* %a, i32 2 - %addr.b.2 = getelementptr i16, i16* %b, i32 2 - %ld.a.2 = load i16, i16* %addr.a.2 + %addr.a.2 = getelementptr i16, ptr %a, i32 2 + %addr.b.2 = getelementptr i16, ptr %b, i32 2 + %ld.a.2 = load i16, ptr %addr.a.2 %sext.a.2 = sext i16 %ld.a.2 to i32 - %ld.b.2 = load i16, i16* %addr.b.2 + %ld.b.2 = load i16, ptr %addr.b.2 %sext.b.2 = sext i16 %ld.b.2 to i32 %mul.2 = mul i32 %sext.a.2, %sext.b.2 %sext.mul.2 = sext i32 %mul.2 to i64 - %addr.a.3 = getelementptr i16, i16* %a, i32 3 - %addr.b.3 = getelementptr i16, i16* %b, i32 3 - %ld.a.3 = load i16, i16* %addr.a.3 + %addr.a.3 = getelementptr i16, ptr %a, i32 3 + %addr.b.3 = getelementptr i16, ptr %b, i32 3 + %ld.a.3 = load i16, ptr %addr.a.3 %sext.a.3 = sext i16 %ld.a.3 to i32 - %ld.b.3 = load i16, i16* %addr.b.3 + %ld.b.3 = load i16, ptr %addr.b.3 %sext.b.3 = sext i16 %ld.b.3 to i32 %mul.3 = mul i32 %sext.a.3, %sext.b.3 %sext.mul.3 = sext i32 %mul.3 to i64 diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/smladx-1.ll b/llvm/test/CodeGen/ARM/ParallelDSP/smladx-1.ll index 397ca7f..55328ab 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/smladx-1.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/smladx-1.ll @@ -3,21 +3,17 @@ ; RUN: opt -mtriple=arm-none-none-eabi -mcpu=cortex-m33 -mattr=-dsp < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED ; RUN: opt -mtriple=armeb-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED -define i32 @smladx(i16* nocapture readonly %pIn1, i16* nocapture readonly %pIn2, i32 %j, i32 %limit) { +define i32 @smladx(ptr nocapture readonly %pIn1, ptr nocapture readonly %pIn2, i32 %j, i32 %limit) { ; CHECK-LABEL: smladx ; CHECK: = phi i32 [ 0, %for.body.preheader.new ], ; CHECK: [[ACC0:%[^ ]+]] = phi i32 [ 0, %for.body.preheader.new ], [ [[ACC2:%[^ ]+]], %for.body ] -; CHECK: [[PIN21:%[^ ]+]] = bitcast i16* %pIn2.1 to i32* -; CHECK: [[IN21:%[^ ]+]] = load i32, i32* [[PIN21]], align 2 -; CHECK: [[PIN10:%[^ ]+]] = bitcast i16* %pIn1.0 to i32* -; CHECK: [[IN10:%[^ ]+]] = load i32, i32* [[PIN10]], align 2 +; CHECK: [[IN21:%[^ ]+]] = load i32, ptr %pIn2.1, align 2 +; CHECK: [[IN10:%[^ ]+]] = load i32, ptr %pIn1.0, align 2 ; CHECK: [[ACC1:%[^ ]+]] = call i32 @llvm.arm.smladx(i32 [[IN21]], i32 [[IN10]], i32 [[ACC0]]) -; CHECK: [[PIN23:%[^ ]+]] = bitcast i16* %pIn2.3 to i32* -; CHECK: [[IN23:%[^ ]+]] = load i32, i32* [[PIN23]], align 2 -; CHECK: [[PIN12:%[^ ]+]] = bitcast i16* %pIn1.2 to i32* -; CHECK: [[IN12:%[^ ]+]] = load i32, i32* [[PIN12]], align 2 +; CHECK: [[IN23:%[^ ]+]] = load i32, ptr %pIn2.3, align 2 +; CHECK: [[IN12:%[^ ]+]] = load i32, ptr %pIn1.2, align 2 ; CHECK: [[ACC2]] = call i32 @llvm.arm.smladx(i32 [[IN23]], i32 [[IN12]], i32 [[ACC1]]) ; CHECK-NOT: call i32 @llvm.arm.smlad ; CHECK-UNSUPPORTED-NOT: call i32 @llvm.arm.smlad @@ -48,11 +44,11 @@ for.body.epil: %sum.010.epil = phi i32 [ %add.epil, %for.body.epil ], [ %sum.010.unr, %for.cond.cleanup.loopexit.unr-lcssa ] %epil.iter = phi i32 [ %epil.iter.sub, %for.body.epil ], [ %xtraiter, %for.cond.cleanup.loopexit.unr-lcssa ] %sub.epil = sub i32 %j, %i.011.epil - %arrayidx.epil = getelementptr inbounds i16, i16* %pIn2, i32 %sub.epil - %2 = load i16, i16* %arrayidx.epil, align 2 + %arrayidx.epil = getelementptr inbounds i16, ptr %pIn2, i32 %sub.epil + %2 = load i16, ptr %arrayidx.epil, align 2 %conv.epil = sext i16 %2 to i32 - %arrayidx1.epil = getelementptr inbounds i16, i16* %pIn1, i32 %i.011.epil - %3 = load i16, i16* %arrayidx1.epil, align 2 + %arrayidx1.epil = getelementptr inbounds i16, ptr %pIn1, i32 %i.011.epil + %3 = load i16, ptr %arrayidx1.epil, align 2 %conv2.epil = sext i16 %3 to i32 %mul.epil = mul nsw i32 %conv2.epil, %conv.epil %add.epil = add nsw i32 %mul.epil, %sum.010.epil @@ -69,26 +65,25 @@ for.body: %i.011 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ] %sum.010 = phi i32 [ 0, %for.body.preheader.new ], [ %add.3, %for.body ] %niter = phi i32 [ %unroll_iter, %for.body.preheader.new ], [ %niter.nsub.3, %for.body ] - %pIn2Base = phi i16* [ %pIn2, %for.body.preheader.new ], [ %pIn2.4, %for.body ] - %pIn2.0 = getelementptr inbounds i16, i16* %pIn2Base, i32 0 - %In2 = load i16, i16* %pIn2.0, align 2 - %pIn1.0 = getelementptr inbounds i16, i16* %pIn1, i32 %i.011 - %In1 = load i16, i16* %pIn1.0, align 2 + %pIn2Base = phi ptr [ %pIn2, %for.body.preheader.new ], [ %pIn2.4, %for.body ] + %In2 = load i16, ptr %pIn2Base, align 2 + %pIn1.0 = getelementptr inbounds i16, ptr %pIn1, i32 %i.011 + %In1 = load i16, ptr %pIn1.0, align 2 %inc = or i32 %i.011, 1 - %pIn2.1 = getelementptr inbounds i16, i16* %pIn2Base, i32 -1 - %In2.1 = load i16, i16* %pIn2.1, align 2 - %pIn1.1 = getelementptr inbounds i16, i16* %pIn1, i32 %inc - %In1.1 = load i16, i16* %pIn1.1, align 2 + %pIn2.1 = getelementptr inbounds i16, ptr %pIn2Base, i32 -1 + %In2.1 = load i16, ptr %pIn2.1, align 2 + %pIn1.1 = getelementptr inbounds i16, ptr %pIn1, i32 %inc + %In1.1 = load i16, ptr %pIn1.1, align 2 %inc.1 = or i32 %i.011, 2 - %pIn2.2 = getelementptr inbounds i16, i16* %pIn2Base, i32 -2 - %In2.2 = load i16, i16* %pIn2.2, align 2 - %pIn1.2 = getelementptr inbounds i16, i16* %pIn1, i32 %inc.1 - %In1.2 = load i16, i16* %pIn1.2, align 2 + %pIn2.2 = getelementptr inbounds i16, ptr %pIn2Base, i32 -2 + %In2.2 = load i16, ptr %pIn2.2, align 2 + %pIn1.2 = getelementptr inbounds i16, ptr %pIn1, i32 %inc.1 + %In1.2 = load i16, ptr %pIn1.2, align 2 %inc.2 = or i32 %i.011, 3 - %pIn2.3 = getelementptr inbounds i16, i16* %pIn2Base, i32 -3 - %In2.3 = load i16, i16* %pIn2.3, align 2 - %pIn1.3 = getelementptr inbounds i16, i16* %pIn1, i32 %inc.2 - %In1.3 = load i16, i16* %pIn1.3, align 2 + %pIn2.3 = getelementptr inbounds i16, ptr %pIn2Base, i32 -3 + %In2.3 = load i16, ptr %pIn2.3, align 2 + %pIn1.3 = getelementptr inbounds i16, ptr %pIn1, i32 %inc.2 + %In1.3 = load i16, ptr %pIn1.3, align 2 %sextIn1 = sext i16 %In1 to i32 %sextIn1.1 = sext i16 %In1.1 to i32 %sextIn1.2 = sext i16 %In1.2 to i32 @@ -106,44 +101,40 @@ for.body: %add.2 = add nsw i32 %mul.2, %add.1 %add.3 = add nsw i32 %mul.3, %add.2 %inc.3 = add i32 %i.011, 4 - %pIn2.4 = getelementptr inbounds i16, i16* %pIn2Base, i32 -4 + %pIn2.4 = getelementptr inbounds i16, ptr %pIn2Base, i32 -4 %niter.nsub.3 = add i32 %niter, -4 %niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0 br i1 %niter.ncmp.3, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body } -define i32 @smladx_swap(i16* nocapture readonly %pIn1, i16* nocapture readonly %pIn2, i32 %j, i32 %limit) { +define i32 @smladx_swap(ptr nocapture readonly %pIn1, ptr nocapture readonly %pIn2, i32 %j, i32 %limit) { ; CHECK-LABEL: smladx_swap ; CHECK: for.body.preheader.new: -; CHECK: [[PIN1Base:[^ ]+]] = getelementptr i16, i16* %pIn1 -; CHECK: [[PIN2Base:[^ ]+]] = getelementptr i16, i16* %pIn2 +; CHECK: [[PIN1Base:[^ ]+]] = getelementptr i16, ptr %pIn1 +; CHECK: [[PIN2Base:[^ ]+]] = getelementptr i16, ptr %pIn2 ; CHECK: for.body: -; CHECK: [[PIN2:%[^ ]+]] = phi i16* [ [[PIN2_NEXT:%[^ ]+]], %for.body ], [ [[PIN2Base]], %for.body.preheader.new ] -; CHECK: [[PIN1:%[^ ]+]] = phi i16* [ [[PIN1_NEXT:%[^ ]+]], %for.body ], [ [[PIN1Base]], %for.body.preheader.new ] +; CHECK: [[PIN2:%[^ ]+]] = phi ptr [ [[PIN2_NEXT:%[^ ]+]], %for.body ], [ [[PIN2Base]], %for.body.preheader.new ] +; CHECK: [[PIN1:%[^ ]+]] = phi ptr [ [[PIN1_NEXT:%[^ ]+]], %for.body ], [ [[PIN1Base]], %for.body.preheader.new ] ; CHECK: [[IV:%[^ ]+]] = phi i32 ; CHECK: [[ACC0:%[^ ]+]] = phi i32 [ 0, %for.body.preheader.new ], [ [[ACC2:%[^ ]+]], %for.body ] -; CHECK: [[PIN2_CAST:%[^ ]+]] = bitcast i16* [[PIN2]] to i32* -; CHECK: [[IN2:%[^ ]+]] = load i32, i32* [[PIN2_CAST]], align 2 +; CHECK: [[IN2:%[^ ]+]] = load i32, ptr [[PIN2]], align 2 -; CHECK: [[PIN1_2:%[^ ]+]] = getelementptr i16, i16* [[PIN1]], i32 -2 -; CHECK: [[PIN1_2_CAST:%[^ ]+]] = bitcast i16* [[PIN1_2]] to i32* -; CHECK: [[IN1_2:%[^ ]+]] = load i32, i32* [[PIN1_2_CAST]], align 2 +; CHECK: [[PIN1_2:%[^ ]+]] = getelementptr i16, ptr [[PIN1]], i32 -2 +; CHECK: [[IN1_2:%[^ ]+]] = load i32, ptr [[PIN1_2]], align 2 ; CHECK: [[ACC1:%[^ ]+]] = call i32 @llvm.arm.smladx(i32 [[IN2]], i32 [[IN1_2]], i32 [[ACC0]]) -; CHECK: [[PIN2_2:%[^ ]+]] = getelementptr i16, i16* [[PIN2]], i32 -2 -; CHECK: [[PIN2_2_CAST:%[^ ]+]] = bitcast i16* [[PIN2_2]] to i32* -; CHECK: [[IN2_2:%[^ ]+]] = load i32, i32* [[PIN2_2_CAST]], align 2 +; CHECK: [[PIN2_2:%[^ ]+]] = getelementptr i16, ptr [[PIN2]], i32 -2 +; CHECK: [[IN2_2:%[^ ]+]] = load i32, ptr [[PIN2_2]], align 2 -; CHECK: [[PIN1_CAST:%[^ ]+]] = bitcast i16* [[PIN1]] to i32* -; CHECK: [[IN1:%[^ ]+]] = load i32, i32* [[PIN1_CAST]], align 2 +; CHECK: [[IN1:%[^ ]+]] = load i32, ptr [[PIN1]], align 2 ; CHECK: [[ACC2]] = call i32 @llvm.arm.smladx(i32 [[IN2_2]], i32 [[IN1]], i32 [[ACC1]]) -; CHECK: [[PIN1_NEXT]] = getelementptr i16, i16* [[PIN1]], i32 4 -; CHECK: [[PIN2_NEXT]] = getelementptr i16, i16* [[PIN2]], i32 -4 +; CHECK: [[PIN1_NEXT]] = getelementptr i16, ptr [[PIN1]], i32 4 +; CHECK: [[PIN2_NEXT]] = getelementptr i16, ptr [[PIN2]], i32 -4 ; CHECK-NOT: call i32 @llvm.arm.smlad ; CHECK-UNSUPPORTED-NOT: call i32 @llvm.arm.smlad @@ -160,9 +151,9 @@ for.body.preheader: for.body.preheader.new: %unroll_iter = sub i32 %limit, %xtraiter - %scevgep6 = getelementptr i16, i16* %pIn1, i32 2 + %scevgep6 = getelementptr i16, ptr %pIn1, i32 2 %2 = add i32 %j, -1 - %scevgep11 = getelementptr i16, i16* %pIn2, i32 %2 + %scevgep11 = getelementptr i16, ptr %pIn2, i32 %2 br label %for.body for.cond.cleanup.loopexit.unr-lcssa: @@ -173,25 +164,25 @@ for.cond.cleanup.loopexit.unr-lcssa: br i1 %lcmp.mod, label %for.cond.cleanup, label %for.body.epil.preheader for.body.epil.preheader: - %scevgep = getelementptr i16, i16* %pIn1, i32 %i.011.unr + %scevgep = getelementptr i16, ptr %pIn1, i32 %i.011.unr %3 = sub i32 %j, %i.011.unr - %scevgep2 = getelementptr i16, i16* %pIn2, i32 %3 + %scevgep2 = getelementptr i16, ptr %pIn2, i32 %3 %4 = sub i32 0, %xtraiter br label %for.body.epil for.body.epil: %lsr.iv5 = phi i32 [ %4, %for.body.epil.preheader ], [ %lsr.iv.next, %for.body.epil ] - %lsr.iv3 = phi i16* [ %scevgep2, %for.body.epil.preheader ], [ %scevgep4, %for.body.epil ] - %lsr.iv = phi i16* [ %scevgep, %for.body.epil.preheader ], [ %scevgep1, %for.body.epil ] + %lsr.iv3 = phi ptr [ %scevgep2, %for.body.epil.preheader ], [ %scevgep4, %for.body.epil ] + %lsr.iv = phi ptr [ %scevgep, %for.body.epil.preheader ], [ %scevgep1, %for.body.epil ] %sum.010.epil = phi i32 [ %add.epil, %for.body.epil ], [ %sum.010.unr, %for.body.epil.preheader ] - %5 = load i16, i16* %lsr.iv3, align 2 + %5 = load i16, ptr %lsr.iv3, align 2 %conv.epil = sext i16 %5 to i32 - %6 = load i16, i16* %lsr.iv, align 2 + %6 = load i16, ptr %lsr.iv, align 2 %conv2.epil = sext i16 %6 to i32 %mul.epil = mul nsw i32 %conv2.epil, %conv.epil %add.epil = add nsw i32 %mul.epil, %sum.010.epil - %scevgep1 = getelementptr i16, i16* %lsr.iv, i32 1 - %scevgep4 = getelementptr i16, i16* %lsr.iv3, i32 -1 + %scevgep1 = getelementptr i16, ptr %lsr.iv, i32 1 + %scevgep4 = getelementptr i16, ptr %lsr.iv3, i32 -1 %lsr.iv.next = add nsw i32 %lsr.iv5, 1 %epil.iter.cmp = icmp eq i32 %lsr.iv.next, 0 br i1 %epil.iter.cmp, label %for.cond.cleanup, label %for.body.epil @@ -201,24 +192,24 @@ for.cond.cleanup: ret i32 %sum.0.lcssa for.body: - %pin2 = phi i16* [ %pin2_sub4, %for.body ], [ %scevgep11, %for.body.preheader.new ] - %pin1 = phi i16* [ %pin1_add4, %for.body ], [ %scevgep6, %for.body.preheader.new ] + %pin2 = phi ptr [ %pin2_sub4, %for.body ], [ %scevgep11, %for.body.preheader.new ] + %pin1 = phi ptr [ %pin1_add4, %for.body ], [ %scevgep6, %for.body.preheader.new ] %i.011 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ] %sum.010 = phi i32 [ 0, %for.body.preheader.new ], [ %add.3, %for.body ] - %pin2_add1 = getelementptr i16, i16* %pin2, i32 1 - %In2 = load i16, i16* %pin2_add1, align 2 - %pin1_sub2 = getelementptr i16, i16* %pin1, i32 -2 - %In1 = load i16, i16* %pin1_sub2, align 2 - %In2.1 = load i16, i16* %pin2, align 2 - %pin1_sub1 = getelementptr i16, i16* %pin1, i32 -1 - %In1.1 = load i16, i16* %pin1_sub1, align 2 - %pin2_sub1 = getelementptr i16, i16* %pin2, i32 -1 - %In2.2 = load i16, i16* %pin2_sub1, align 2 - %In1.2 = load i16, i16* %pin1, align 2 - %pin2_sub2 = getelementptr i16, i16* %pin2, i32 -2 - %In2.3 = load i16, i16* %pin2_sub2, align 2 - %pin1_add1 = getelementptr i16, i16* %pin1, i32 1 - %In1.3 = load i16, i16* %pin1_add1, align 2 + %pin2_add1 = getelementptr i16, ptr %pin2, i32 1 + %In2 = load i16, ptr %pin2_add1, align 2 + %pin1_sub2 = getelementptr i16, ptr %pin1, i32 -2 + %In1 = load i16, ptr %pin1_sub2, align 2 + %In2.1 = load i16, ptr %pin2, align 2 + %pin1_sub1 = getelementptr i16, ptr %pin1, i32 -1 + %In1.1 = load i16, ptr %pin1_sub1, align 2 + %pin2_sub1 = getelementptr i16, ptr %pin2, i32 -1 + %In2.2 = load i16, ptr %pin2_sub1, align 2 + %In1.2 = load i16, ptr %pin1, align 2 + %pin2_sub2 = getelementptr i16, ptr %pin2, i32 -2 + %In2.3 = load i16, ptr %pin2_sub2, align 2 + %pin1_add1 = getelementptr i16, ptr %pin1, i32 1 + %In1.3 = load i16, ptr %pin1_add1, align 2 %sextIn2 = sext i16 %In2 to i32 %sextIn1 = sext i16 %In1 to i32 %sextIn2.1 = sext i16 %In2.1 to i32 @@ -236,8 +227,8 @@ for.body: %mul.3 = mul nsw i32 %sextIn2.3, %sextIn1.3 %add.3 = add nsw i32 %mul.3, %add.2 %inc.3 = add i32 %i.011, 4 - %pin1_add4 = getelementptr i16, i16* %pin1, i32 4 - %pin2_sub4 = getelementptr i16, i16* %pin2, i32 -4 + %pin1_add4 = getelementptr i16, ptr %pin1, i32 4 + %pin2_sub4 = getelementptr i16, ptr %pin2, i32 -4 %niter.ncmp.3 = icmp eq i32 %unroll_iter, %inc.3 br i1 %niter.ncmp.3, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body } diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-1.ll b/llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-1.ll index 76f5533..a24fbfd 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-1.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-1.ll @@ -2,20 +2,16 @@ ; RUN: opt -mtriple=arm-none-none-eabi -mcpu=cortex-m0 < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED ; RUN: opt -mtriple=arm-none-none-eabi -mcpu=cortex-m33 -mattr=-dsp < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED -define i64 @smlaldx(i16* nocapture readonly %pIn1, i16* nocapture readonly %pIn2, i32 %j, i32 %limit) { +define i64 @smlaldx(ptr nocapture readonly %pIn1, ptr nocapture readonly %pIn2, i32 %j, i32 %limit) { ; CHECK-LABEL: smlaldx ; CHECK: = phi i32 [ 0, %for.body.preheader.new ], ; CHECK: [[ACC0:%[^ ]+]] = phi i64 [ 0, %for.body.preheader.new ], [ [[ACC2:%[^ ]+]], %for.body ] -; CHECK: [[PIN21:%[^ ]+]] = bitcast i16* %pIn2.1 to i32* -; CHECK: [[IN21:%[^ ]+]] = load i32, i32* [[PIN21]], align 2 -; CHECK: [[PIN10:%[^ ]+]] = bitcast i16* %pIn1.0 to i32* -; CHECK: [[IN10:%[^ ]+]] = load i32, i32* [[PIN10]], align 2 +; CHECK: [[IN21:%[^ ]+]] = load i32, ptr %pIn2.1, align 2 +; CHECK: [[IN10:%[^ ]+]] = load i32, ptr %pIn1.0, align 2 ; CHECK: [[ACC1:%[^ ]+]] = call i64 @llvm.arm.smlaldx(i32 [[IN21]], i32 [[IN10]], i64 [[ACC0]]) -; CHECK: [[PIN23:%[^ ]+]] = bitcast i16* %pIn2.3 to i32* -; CHECK: [[IN23:%[^ ]+]] = load i32, i32* [[PIN23]], align 2 -; CHECK: [[PIN12:%[^ ]+]] = bitcast i16* %pIn1.2 to i32* -; CHECK: [[IN12:%[^ ]+]] = load i32, i32* [[PIN12]], align 2 +; CHECK: [[IN23:%[^ ]+]] = load i32, ptr %pIn2.3, align 2 +; CHECK: [[IN12:%[^ ]+]] = load i32, ptr %pIn1.2, align 2 ; CHECK: [[ACC2]] = call i64 @llvm.arm.smlaldx(i32 [[IN23]], i32 [[IN12]], i64 [[ACC1]]) ; CHECK-NOT: call i64 @llvm.arm.smlad ; CHECK-UNSUPPORTED-NOT: call i64 @llvm.arm.smlad @@ -46,11 +42,11 @@ for.body.epil: %sum.010.epil = phi i64 [ %add.epil, %for.body.epil ], [ %sum.010.unr, %for.cond.cleanup.loopexit.unr-lcssa ] %epil.iter = phi i32 [ %epil.iter.sub, %for.body.epil ], [ %xtraiter, %for.cond.cleanup.loopexit.unr-lcssa ] %sub.epil = sub i32 %j, %i.011.epil - %arrayidx.epil = getelementptr inbounds i16, i16* %pIn2, i32 %sub.epil - %2 = load i16, i16* %arrayidx.epil, align 2 + %arrayidx.epil = getelementptr inbounds i16, ptr %pIn2, i32 %sub.epil + %2 = load i16, ptr %arrayidx.epil, align 2 %conv.epil = sext i16 %2 to i32 - %arrayidx1.epil = getelementptr inbounds i16, i16* %pIn1, i32 %i.011.epil - %3 = load i16, i16* %arrayidx1.epil, align 2 + %arrayidx1.epil = getelementptr inbounds i16, ptr %pIn1, i32 %i.011.epil + %3 = load i16, ptr %arrayidx1.epil, align 2 %conv2.epil = sext i16 %3 to i32 %mul.epil = mul nsw i32 %conv2.epil, %conv.epil %sext.mul.epil = sext i32 %mul.epil to i64 @@ -68,26 +64,25 @@ for.body: %i.011 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ] %sum.010 = phi i64 [ 0, %for.body.preheader.new ], [ %add.3, %for.body ] %niter = phi i32 [ %unroll_iter, %for.body.preheader.new ], [ %niter.nsub.3, %for.body ] - %pIn2Base = phi i16* [ %pIn2, %for.body.preheader.new ], [ %pIn2.4, %for.body ] - %pIn2.0 = getelementptr inbounds i16, i16* %pIn2Base, i32 0 - %In2 = load i16, i16* %pIn2.0, align 2 - %pIn1.0 = getelementptr inbounds i16, i16* %pIn1, i32 %i.011 - %In1 = load i16, i16* %pIn1.0, align 2 + %pIn2Base = phi ptr [ %pIn2, %for.body.preheader.new ], [ %pIn2.4, %for.body ] + %In2 = load i16, ptr %pIn2Base, align 2 + %pIn1.0 = getelementptr inbounds i16, ptr %pIn1, i32 %i.011 + %In1 = load i16, ptr %pIn1.0, align 2 %inc = or i32 %i.011, 1 - %pIn2.1 = getelementptr inbounds i16, i16* %pIn2Base, i32 -1 - %In2.1 = load i16, i16* %pIn2.1, align 2 - %pIn1.1 = getelementptr inbounds i16, i16* %pIn1, i32 %inc - %In1.1 = load i16, i16* %pIn1.1, align 2 + %pIn2.1 = getelementptr inbounds i16, ptr %pIn2Base, i32 -1 + %In2.1 = load i16, ptr %pIn2.1, align 2 + %pIn1.1 = getelementptr inbounds i16, ptr %pIn1, i32 %inc + %In1.1 = load i16, ptr %pIn1.1, align 2 %inc.1 = or i32 %i.011, 2 - %pIn2.2 = getelementptr inbounds i16, i16* %pIn2Base, i32 -2 - %In2.2 = load i16, i16* %pIn2.2, align 2 - %pIn1.2 = getelementptr inbounds i16, i16* %pIn1, i32 %inc.1 - %In1.2 = load i16, i16* %pIn1.2, align 2 + %pIn2.2 = getelementptr inbounds i16, ptr %pIn2Base, i32 -2 + %In2.2 = load i16, ptr %pIn2.2, align 2 + %pIn1.2 = getelementptr inbounds i16, ptr %pIn1, i32 %inc.1 + %In1.2 = load i16, ptr %pIn1.2, align 2 %inc.2 = or i32 %i.011, 3 - %pIn2.3 = getelementptr inbounds i16, i16* %pIn2Base, i32 -3 - %In2.3 = load i16, i16* %pIn2.3, align 2 - %pIn1.3 = getelementptr inbounds i16, i16* %pIn1, i32 %inc.2 - %In1.3 = load i16, i16* %pIn1.3, align 2 + %pIn2.3 = getelementptr inbounds i16, ptr %pIn2Base, i32 -3 + %In2.3 = load i16, ptr %pIn2.3, align 2 + %pIn1.3 = getelementptr inbounds i16, ptr %pIn1, i32 %inc.2 + %In1.3 = load i16, ptr %pIn1.3, align 2 %sextIn1 = sext i16 %In1 to i32 %sextIn1.1 = sext i16 %In1.1 to i32 %sextIn1.2 = sext i16 %In1.2 to i32 @@ -109,13 +104,13 @@ for.body: %add.2 = add nsw i64 %sext.mul.2, %add.1 %add.3 = add nsw i64 %sext.mul.3, %add.2 %inc.3 = add i32 %i.011, 4 - %pIn2.4 = getelementptr inbounds i16, i16* %pIn2Base, i32 -4 + %pIn2.4 = getelementptr inbounds i16, ptr %pIn2Base, i32 -4 %niter.nsub.3 = add i32 %niter, -4 %niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0 br i1 %niter.ncmp.3, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body } -define i64 @smlaldx_swap(i16* nocapture readonly %pIn1, i16* nocapture readonly %pIn2, i32 %j, i32 %limit) { +define i64 @smlaldx_swap(ptr nocapture readonly %pIn1, ptr nocapture readonly %pIn2, i32 %j, i32 %limit) { entry: %cmp9 = icmp eq i32 %limit, 0 @@ -129,9 +124,9 @@ for.body.preheader: for.body.preheader.new: %unroll_iter = sub i32 %limit, %xtraiter - %scevgep6 = getelementptr i16, i16* %pIn1, i32 2 + %scevgep6 = getelementptr i16, ptr %pIn1, i32 2 %2 = add i32 %j, -1 - %scevgep11 = getelementptr i16, i16* %pIn2, i32 %2 + %scevgep11 = getelementptr i16, ptr %pIn2, i32 %2 br label %for.body for.cond.cleanup.loopexit.unr-lcssa: @@ -142,26 +137,26 @@ for.cond.cleanup.loopexit.unr-lcssa: br i1 %lcmp.mod, label %for.cond.cleanup, label %for.body.epil.preheader for.body.epil.preheader: - %scevgep = getelementptr i16, i16* %pIn1, i32 %i.011.unr + %scevgep = getelementptr i16, ptr %pIn1, i32 %i.011.unr %3 = sub i32 %j, %i.011.unr - %scevgep2 = getelementptr i16, i16* %pIn2, i32 %3 + %scevgep2 = getelementptr i16, ptr %pIn2, i32 %3 %4 = sub i32 0, %xtraiter br label %for.body.epil for.body.epil: %lsr.iv5 = phi i32 [ %4, %for.body.epil.preheader ], [ %lsr.iv.next, %for.body.epil ] - %lsr.iv3 = phi i16* [ %scevgep2, %for.body.epil.preheader ], [ %scevgep4, %for.body.epil ] - %lsr.iv = phi i16* [ %scevgep, %for.body.epil.preheader ], [ %scevgep1, %for.body.epil ] + %lsr.iv3 = phi ptr [ %scevgep2, %for.body.epil.preheader ], [ %scevgep4, %for.body.epil ] + %lsr.iv = phi ptr [ %scevgep, %for.body.epil.preheader ], [ %scevgep1, %for.body.epil ] %sum.010.epil = phi i64 [ %add.epil, %for.body.epil ], [ %sum.010.unr, %for.body.epil.preheader ] - %5 = load i16, i16* %lsr.iv3, align 2 + %5 = load i16, ptr %lsr.iv3, align 2 %conv.epil = sext i16 %5 to i32 - %6 = load i16, i16* %lsr.iv, align 2 + %6 = load i16, ptr %lsr.iv, align 2 %conv2.epil = sext i16 %6 to i32 %mul.epil = mul nsw i32 %conv2.epil, %conv.epil %sext.mul.epil = sext i32 %mul.epil to i64 %add.epil = add nsw i64 %sext.mul.epil, %sum.010.epil - %scevgep1 = getelementptr i16, i16* %lsr.iv, i32 1 - %scevgep4 = getelementptr i16, i16* %lsr.iv3, i32 -1 + %scevgep1 = getelementptr i16, ptr %lsr.iv, i32 1 + %scevgep4 = getelementptr i16, ptr %lsr.iv3, i32 -1 %lsr.iv.next = add nsw i32 %lsr.iv5, 1 %epil.iter.cmp = icmp eq i32 %lsr.iv.next, 0 br i1 %epil.iter.cmp, label %for.cond.cleanup, label %for.body.epil @@ -172,56 +167,52 @@ for.cond.cleanup: ; CHECK-LABEL: smlaldx_swap ; CHECK: for.body.preheader.new: -; CHECK: [[PIN1Base:[^ ]+]] = getelementptr i16, i16* %pIn1 -; CHECK: [[PIN2Base:[^ ]+]] = getelementptr i16, i16* %pIn2 +; CHECK: [[PIN1Base:[^ ]+]] = getelementptr i16, ptr %pIn1 +; CHECK: [[PIN2Base:[^ ]+]] = getelementptr i16, ptr %pIn2 ; CHECK: for.body: -; CHECK: [[PIN2:%[^ ]+]] = phi i16* [ [[PIN2_NEXT:%[^ ]+]], %for.body ], [ [[PIN2Base]], %for.body.preheader.new ] -; CHECK: [[PIN1:%[^ ]+]] = phi i16* [ [[PIN1_NEXT:%[^ ]+]], %for.body ], [ [[PIN1Base]], %for.body.preheader.new ] +; CHECK: [[PIN2:%[^ ]+]] = phi ptr [ [[PIN2_NEXT:%[^ ]+]], %for.body ], [ [[PIN2Base]], %for.body.preheader.new ] +; CHECK: [[PIN1:%[^ ]+]] = phi ptr [ [[PIN1_NEXT:%[^ ]+]], %for.body ], [ [[PIN1Base]], %for.body.preheader.new ] ; CHECK: [[IV:%[^ ]+]] = phi i32 ; CHECK: [[ACC0:%[^ ]+]] = phi i64 [ 0, %for.body.preheader.new ], [ [[ACC2:%[^ ]+]], %for.body ] -; CHECK: [[PIN2_CAST:%[^ ]+]] = bitcast i16* [[PIN2]] to i32* -; CHECK: [[IN2:%[^ ]+]] = load i32, i32* [[PIN2_CAST]], align 2 +; CHECK: [[IN2:%[^ ]+]] = load i32, ptr [[PIN2]], align 2 -; CHECK: [[PIN1_2:%[^ ]+]] = getelementptr i16, i16* [[PIN1]], i32 -2 -; CHECK: [[PIN1_2_CAST:%[^ ]+]] = bitcast i16* [[PIN1_2]] to i32* -; CHECK: [[IN1_2:%[^ ]+]] = load i32, i32* [[PIN1_2_CAST]], align 2 +; CHECK: [[PIN1_2:%[^ ]+]] = getelementptr i16, ptr [[PIN1]], i32 -2 +; CHECK: [[IN1_2:%[^ ]+]] = load i32, ptr [[PIN1_2]], align 2 ; CHECK: [[ACC1:%[^ ]+]] = call i64 @llvm.arm.smlaldx(i32 [[IN2]], i32 [[IN1_2]], i64 [[ACC0]]) -; CHECK: [[PIN2_2:%[^ ]+]] = getelementptr i16, i16* [[PIN2]], i32 -2 -; CHECK: [[PIN2_2_CAST:%[^ ]+]] = bitcast i16* [[PIN2_2]] to i32* -; CHECK: [[IN2_2:%[^ ]+]] = load i32, i32* [[PIN2_2_CAST]], align 2 +; CHECK: [[PIN2_2:%[^ ]+]] = getelementptr i16, ptr [[PIN2]], i32 -2 +; CHECK: [[IN2_2:%[^ ]+]] = load i32, ptr [[PIN2_2]], align 2 -; CHECK: [[PIN1_CAST:%[^ ]+]] = bitcast i16* [[PIN1]] to i32* -; CHECK: [[IN1:%[^ ]+]] = load i32, i32* [[PIN1_CAST]], align 2 +; CHECK: [[IN1:%[^ ]+]] = load i32, ptr [[PIN1]], align 2 ; CHECK: [[ACC2]] = call i64 @llvm.arm.smlaldx(i32 [[IN2_2]], i32 [[IN1]], i64 [[ACC1]]) -; CHECK: [[PIN1_NEXT]] = getelementptr i16, i16* [[PIN1]], i32 4 -; CHECK: [[PIN2_NEXT]] = getelementptr i16, i16* [[PIN2]], i32 -4 +; CHECK: [[PIN1_NEXT]] = getelementptr i16, ptr [[PIN1]], i32 4 +; CHECK: [[PIN2_NEXT]] = getelementptr i16, ptr [[PIN2]], i32 -4 ; CHECK-NOT: call i64 @llvm.arm.smlad ; CHECK-UNSUPPORTED-NOT: call i64 @llvm.arm.smlad for.body: - %pin2 = phi i16* [ %pin2.sub4, %for.body ], [ %scevgep11, %for.body.preheader.new ] - %pin1 = phi i16* [ %pin1.add4, %for.body ], [ %scevgep6, %for.body.preheader.new ] + %pin2 = phi ptr [ %pin2.sub4, %for.body ], [ %scevgep11, %for.body.preheader.new ] + %pin1 = phi ptr [ %pin1.add4, %for.body ], [ %scevgep6, %for.body.preheader.new ] %i.011 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ] %sum.010 = phi i64 [ 0, %for.body.preheader.new ], [ %add.3, %for.body ] - %pin2.add1 = getelementptr i16, i16* %pin2, i32 1 - %In2 = load i16, i16* %pin2.add1, align 2 - %pin1.sub2 = getelementptr i16, i16* %pin1, i32 -2 - %In1 = load i16, i16* %pin1.sub2, align 2 - %In2.1 = load i16, i16* %pin2, align 2 - %pin1.sub1 = getelementptr i16, i16* %pin1, i32 -1 - %In1.1 = load i16, i16* %pin1.sub1, align 2 - %pin2.sub1 = getelementptr i16, i16* %pin2, i32 -1 - %In2.2 = load i16, i16* %pin2.sub1, align 2 - %In1.2 = load i16, i16* %pin1, align 2 - %pin2.sub2 = getelementptr i16, i16* %pin2, i32 -2 - %In2.3 = load i16, i16* %pin2.sub2, align 2 - %pin1.add1 = getelementptr i16, i16* %pin1, i32 1 - %In1.3 = load i16, i16* %pin1.add1, align 2 + %pin2.add1 = getelementptr i16, ptr %pin2, i32 1 + %In2 = load i16, ptr %pin2.add1, align 2 + %pin1.sub2 = getelementptr i16, ptr %pin1, i32 -2 + %In1 = load i16, ptr %pin1.sub2, align 2 + %In2.1 = load i16, ptr %pin2, align 2 + %pin1.sub1 = getelementptr i16, ptr %pin1, i32 -1 + %In1.1 = load i16, ptr %pin1.sub1, align 2 + %pin2.sub1 = getelementptr i16, ptr %pin2, i32 -1 + %In2.2 = load i16, ptr %pin2.sub1, align 2 + %In1.2 = load i16, ptr %pin1, align 2 + %pin2.sub2 = getelementptr i16, ptr %pin2, i32 -2 + %In2.3 = load i16, ptr %pin2.sub2, align 2 + %pin1.add1 = getelementptr i16, ptr %pin1, i32 1 + %In1.3 = load i16, ptr %pin1.add1, align 2 %sextIn2 = sext i16 %In2 to i32 %sextIn1 = sext i16 %In1 to i32 %sextIn2.1 = sext i16 %In2.1 to i32 @@ -243,8 +234,8 @@ for.body: %sext.mul.3 = sext i32 %mul.3 to i64 %add.3 = add nsw i64 %sext.mul.3, %add.2 %inc.3 = add i32 %i.011, 4 - %pin1.add4 = getelementptr i16, i16* %pin1, i32 4 - %pin2.sub4 = getelementptr i16, i16* %pin2, i32 -4 + %pin1.add4 = getelementptr i16, ptr %pin1, i32 4 + %pin2.sub4 = getelementptr i16, ptr %pin2, i32 -4 %niter.ncmp.3 = icmp eq i32 %unroll_iter, %inc.3 br i1 %niter.ncmp.3, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body } diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-2.ll b/llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-2.ll index 566e008..7158846 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-2.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-2.ll @@ -2,20 +2,16 @@ ; RUN: opt -mtriple=arm-none-none-eabi -mcpu=cortex-m0 < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED ; RUN: opt -mtriple=arm-none-none-eabi -mcpu=cortex-m33 -mattr=-dsp < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED -define i64 @smlaldx(i16* nocapture readonly %pIn1, i16* nocapture readonly %pIn2, i32 %j, i32 %limit) { +define i64 @smlaldx(ptr nocapture readonly %pIn1, ptr nocapture readonly %pIn2, i32 %j, i32 %limit) { ; CHECK-LABEL: smlaldx ; CHECK: = phi i32 [ 0, %for.body.preheader.new ], ; CHECK: [[ACC0:%[^ ]+]] = phi i64 [ 0, %for.body.preheader.new ], [ [[ACC2:%[^ ]+]], %for.body ] -; CHECK: [[PIN21:%[^ ]+]] = bitcast i16* %pIn2.1 to i32* -; CHECK: [[IN21:%[^ ]+]] = load i32, i32* [[PIN21]], align 2 -; CHECK: [[PIN10:%[^ ]+]] = bitcast i16* %pIn1.0 to i32* -; CHECK: [[IN10:%[^ ]+]] = load i32, i32* [[PIN10]], align 2 +; CHECK: [[IN21:%[^ ]+]] = load i32, ptr %pIn2.1, align 2 +; CHECK: [[IN10:%[^ ]+]] = load i32, ptr %pIn1.0, align 2 ; CHECK: [[ACC1:%[^ ]+]] = call i64 @llvm.arm.smlaldx(i32 [[IN21]], i32 [[IN10]], i64 [[ACC0]]) -; CHECK: [[PIN23:%[^ ]+]] = bitcast i16* %pIn2.3 to i32* -; CHECK: [[IN23:%[^ ]+]] = load i32, i32* [[PIN23]], align 2 -; CHECK: [[PIN12:%[^ ]+]] = bitcast i16* %pIn1.2 to i32* -; CHECK: [[IN12:%[^ ]+]] = load i32, i32* [[PIN12]], align 2 +; CHECK: [[IN23:%[^ ]+]] = load i32, ptr %pIn2.3, align 2 +; CHECK: [[IN12:%[^ ]+]] = load i32, ptr %pIn1.2, align 2 ; CHECK: [[ACC2]] = call i64 @llvm.arm.smlaldx(i32 [[IN23]], i32 [[IN12]], i64 [[ACC1]]) ; CHECK-NOT: call i64 @llvm.arm.smlad ; CHECK-UNSUPPORTED-NOT: call i64 @llvm.arm.smlad @@ -46,11 +42,11 @@ for.body.epil: %sum.010.epil = phi i64 [ %add.epil, %for.body.epil ], [ %sum.010.unr, %for.cond.cleanup.loopexit.unr-lcssa ] %epil.iter = phi i32 [ %epil.iter.sub, %for.body.epil ], [ %xtraiter, %for.cond.cleanup.loopexit.unr-lcssa ] %sub.epil = sub i32 %j, %i.011.epil - %arrayidx.epil = getelementptr inbounds i16, i16* %pIn2, i32 %sub.epil - %2 = load i16, i16* %arrayidx.epil, align 2 + %arrayidx.epil = getelementptr inbounds i16, ptr %pIn2, i32 %sub.epil + %2 = load i16, ptr %arrayidx.epil, align 2 %conv.epil = sext i16 %2 to i32 - %arrayidx1.epil = getelementptr inbounds i16, i16* %pIn1, i32 %i.011.epil - %3 = load i16, i16* %arrayidx1.epil, align 2 + %arrayidx1.epil = getelementptr inbounds i16, ptr %pIn1, i32 %i.011.epil + %3 = load i16, ptr %arrayidx1.epil, align 2 %conv2.epil = sext i16 %3 to i32 %mul.epil = mul nsw i32 %conv2.epil, %conv.epil %sext.mul.epil = sext i32 %mul.epil to i64 @@ -68,26 +64,25 @@ for.body: %i.011 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ] %sum.010 = phi i64 [ 0, %for.body.preheader.new ], [ %add.3, %for.body ] %niter = phi i32 [ %unroll_iter, %for.body.preheader.new ], [ %niter.nsub.3, %for.body ] - %pIn2Base = phi i16* [ %pIn2, %for.body.preheader.new ], [ %pIn2.4, %for.body ] - %pIn2.0 = getelementptr inbounds i16, i16* %pIn2Base, i32 0 - %In2 = load i16, i16* %pIn2.0, align 2 - %pIn1.0 = getelementptr inbounds i16, i16* %pIn1, i32 %i.011 - %In1 = load i16, i16* %pIn1.0, align 2 + %pIn2Base = phi ptr [ %pIn2, %for.body.preheader.new ], [ %pIn2.4, %for.body ] + %In2 = load i16, ptr %pIn2Base, align 2 + %pIn1.0 = getelementptr inbounds i16, ptr %pIn1, i32 %i.011 + %In1 = load i16, ptr %pIn1.0, align 2 %inc = or i32 %i.011, 1 - %pIn2.1 = getelementptr inbounds i16, i16* %pIn2Base, i32 -1 - %In2.1 = load i16, i16* %pIn2.1, align 2 - %pIn1.1 = getelementptr inbounds i16, i16* %pIn1, i32 %inc - %In1.1 = load i16, i16* %pIn1.1, align 2 + %pIn2.1 = getelementptr inbounds i16, ptr %pIn2Base, i32 -1 + %In2.1 = load i16, ptr %pIn2.1, align 2 + %pIn1.1 = getelementptr inbounds i16, ptr %pIn1, i32 %inc + %In1.1 = load i16, ptr %pIn1.1, align 2 %inc.1 = or i32 %i.011, 2 - %pIn2.2 = getelementptr inbounds i16, i16* %pIn2Base, i32 -2 - %In2.2 = load i16, i16* %pIn2.2, align 2 - %pIn1.2 = getelementptr inbounds i16, i16* %pIn1, i32 %inc.1 - %In1.2 = load i16, i16* %pIn1.2, align 2 + %pIn2.2 = getelementptr inbounds i16, ptr %pIn2Base, i32 -2 + %In2.2 = load i16, ptr %pIn2.2, align 2 + %pIn1.2 = getelementptr inbounds i16, ptr %pIn1, i32 %inc.1 + %In1.2 = load i16, ptr %pIn1.2, align 2 %inc.2 = or i32 %i.011, 3 - %pIn2.3 = getelementptr inbounds i16, i16* %pIn2Base, i32 -3 - %In2.3 = load i16, i16* %pIn2.3, align 2 - %pIn1.3 = getelementptr inbounds i16, i16* %pIn1, i32 %inc.2 - %In1.3 = load i16, i16* %pIn1.3, align 2 + %pIn2.3 = getelementptr inbounds i16, ptr %pIn2Base, i32 -3 + %In2.3 = load i16, ptr %pIn2.3, align 2 + %pIn1.3 = getelementptr inbounds i16, ptr %pIn1, i32 %inc.2 + %In1.3 = load i16, ptr %pIn1.3, align 2 %sextIn1 = sext i16 %In1 to i32 %sextIn1.1 = sext i16 %In1.1 to i32 %sextIn1.2 = sext i16 %In1.2 to i32 @@ -109,13 +104,13 @@ for.body: %add.2 = add nsw i64 %add.1, %sext.mul.2 %add.3 = add nsw i64 %sext.mul.3, %add.2 %inc.3 = add i32 %i.011, 4 - %pIn2.4 = getelementptr inbounds i16, i16* %pIn2Base, i32 -4 + %pIn2.4 = getelementptr inbounds i16, ptr %pIn2Base, i32 -4 %niter.nsub.3 = add i32 %niter, -4 %niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0 br i1 %niter.ncmp.3, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body } -define i64 @smlaldx_swap(i16* nocapture readonly %pIn1, i16* nocapture readonly %pIn2, i32 %j, i32 %limit) { +define i64 @smlaldx_swap(ptr nocapture readonly %pIn1, ptr nocapture readonly %pIn2, i32 %j, i32 %limit) { entry: %cmp9 = icmp eq i32 %limit, 0 @@ -129,9 +124,9 @@ for.body.preheader: for.body.preheader.new: %unroll_iter = sub i32 %limit, %xtraiter - %scevgep6 = getelementptr i16, i16* %pIn1, i32 2 + %scevgep6 = getelementptr i16, ptr %pIn1, i32 2 %2 = add i32 %j, -1 - %scevgep11 = getelementptr i16, i16* %pIn2, i32 %2 + %scevgep11 = getelementptr i16, ptr %pIn2, i32 %2 br label %for.body for.cond.cleanup.loopexit.unr-lcssa: @@ -142,26 +137,26 @@ for.cond.cleanup.loopexit.unr-lcssa: br i1 %lcmp.mod, label %for.cond.cleanup, label %for.body.epil.preheader for.body.epil.preheader: - %scevgep = getelementptr i16, i16* %pIn1, i32 %i.011.unr + %scevgep = getelementptr i16, ptr %pIn1, i32 %i.011.unr %3 = sub i32 %j, %i.011.unr - %scevgep2 = getelementptr i16, i16* %pIn2, i32 %3 + %scevgep2 = getelementptr i16, ptr %pIn2, i32 %3 %4 = sub i32 0, %xtraiter br label %for.body.epil for.body.epil: %lsr.iv5 = phi i32 [ %4, %for.body.epil.preheader ], [ %lsr.iv.next, %for.body.epil ] - %lsr.iv3 = phi i16* [ %scevgep2, %for.body.epil.preheader ], [ %scevgep4, %for.body.epil ] - %lsr.iv = phi i16* [ %scevgep, %for.body.epil.preheader ], [ %scevgep1, %for.body.epil ] + %lsr.iv3 = phi ptr [ %scevgep2, %for.body.epil.preheader ], [ %scevgep4, %for.body.epil ] + %lsr.iv = phi ptr [ %scevgep, %for.body.epil.preheader ], [ %scevgep1, %for.body.epil ] %sum.010.epil = phi i64 [ %add.epil, %for.body.epil ], [ %sum.010.unr, %for.body.epil.preheader ] - %5 = load i16, i16* %lsr.iv3, align 2 + %5 = load i16, ptr %lsr.iv3, align 2 %conv.epil = sext i16 %5 to i32 - %6 = load i16, i16* %lsr.iv, align 2 + %6 = load i16, ptr %lsr.iv, align 2 %conv2.epil = sext i16 %6 to i32 %mul.epil = mul nsw i32 %conv2.epil, %conv.epil %sext.mul.epil = sext i32 %mul.epil to i64 %add.epil = add nsw i64 %sext.mul.epil, %sum.010.epil - %scevgep1 = getelementptr i16, i16* %lsr.iv, i32 1 - %scevgep4 = getelementptr i16, i16* %lsr.iv3, i32 -1 + %scevgep1 = getelementptr i16, ptr %lsr.iv, i32 1 + %scevgep4 = getelementptr i16, ptr %lsr.iv3, i32 -1 %lsr.iv.next = add nsw i32 %lsr.iv5, 1 %epil.iter.cmp = icmp eq i32 %lsr.iv.next, 0 br i1 %epil.iter.cmp, label %for.cond.cleanup, label %for.body.epil @@ -172,56 +167,52 @@ for.cond.cleanup: ; CHECK-LABEL: smlaldx_swap ; CHECK: for.body.preheader.new: -; CHECK: [[PIN1Base:[^ ]+]] = getelementptr i16, i16* %pIn1 -; CHECK: [[PIN2Base:[^ ]+]] = getelementptr i16, i16* %pIn2 +; CHECK: [[PIN1Base:[^ ]+]] = getelementptr i16, ptr %pIn1 +; CHECK: [[PIN2Base:[^ ]+]] = getelementptr i16, ptr %pIn2 ; CHECK: for.body: -; CHECK: [[PIN2:%[^ ]+]] = phi i16* [ [[PIN2_NEXT:%[^ ]+]], %for.body ], [ [[PIN2Base]], %for.body.preheader.new ] -; CHECK: [[PIN1:%[^ ]+]] = phi i16* [ [[PIN1_NEXT:%[^ ]+]], %for.body ], [ [[PIN1Base]], %for.body.preheader.new ] +; CHECK: [[PIN2:%[^ ]+]] = phi ptr [ [[PIN2_NEXT:%[^ ]+]], %for.body ], [ [[PIN2Base]], %for.body.preheader.new ] +; CHECK: [[PIN1:%[^ ]+]] = phi ptr [ [[PIN1_NEXT:%[^ ]+]], %for.body ], [ [[PIN1Base]], %for.body.preheader.new ] ; CHECK: [[IV:%[^ ]+]] = phi i32 ; CHECK: [[ACC0:%[^ ]+]] = phi i64 [ 0, %for.body.preheader.new ], [ [[ACC2:%[^ ]+]], %for.body ] -; CHECK: [[PIN2_CAST:%[^ ]+]] = bitcast i16* [[PIN2]] to i32* -; CHECK: [[IN2:%[^ ]+]] = load i32, i32* [[PIN2_CAST]], align 2 +; CHECK: [[IN2:%[^ ]+]] = load i32, ptr [[PIN2]], align 2 -; CHECK: [[PIN1_2:%[^ ]+]] = getelementptr i16, i16* [[PIN1]], i32 -2 -; CHECK: [[PIN1_2_CAST:%[^ ]+]] = bitcast i16* [[PIN1_2]] to i32* -; CHECK: [[IN1_2:%[^ ]+]] = load i32, i32* [[PIN1_2_CAST]], align 2 +; CHECK: [[PIN1_2:%[^ ]+]] = getelementptr i16, ptr [[PIN1]], i32 -2 +; CHECK: [[IN1_2:%[^ ]+]] = load i32, ptr [[PIN1_2]], align 2 ; CHECK: [[ACC1:%[^ ]+]] = call i64 @llvm.arm.smlaldx(i32 [[IN2]], i32 [[IN1_2]], i64 [[ACC0]]) -; CHECK: [[PIN2_2:%[^ ]+]] = getelementptr i16, i16* [[PIN2]], i32 -2 -; CHECK: [[PIN2_2_CAST:%[^ ]+]] = bitcast i16* [[PIN2_2]] to i32* -; CHECK: [[IN2_2:%[^ ]+]] = load i32, i32* [[PIN2_2_CAST]], align 2 +; CHECK: [[PIN2_2:%[^ ]+]] = getelementptr i16, ptr [[PIN2]], i32 -2 +; CHECK: [[IN2_2:%[^ ]+]] = load i32, ptr [[PIN2_2]], align 2 -; CHECK: [[PIN1_CAST:%[^ ]+]] = bitcast i16* [[PIN1]] to i32* -; CHECK: [[IN1:%[^ ]+]] = load i32, i32* [[PIN1_CAST]], align 2 +; CHECK: [[IN1:%[^ ]+]] = load i32, ptr [[PIN1]], align 2 ; CHECK: [[ACC2]] = call i64 @llvm.arm.smlaldx(i32 [[IN2_2]], i32 [[IN1]], i64 [[ACC1]]) -; CHECK: [[PIN1_NEXT]] = getelementptr i16, i16* [[PIN1]], i32 4 -; CHECK: [[PIN2_NEXT]] = getelementptr i16, i16* [[PIN2]], i32 -4 +; CHECK: [[PIN1_NEXT]] = getelementptr i16, ptr [[PIN1]], i32 4 +; CHECK: [[PIN2_NEXT]] = getelementptr i16, ptr [[PIN2]], i32 -4 ; CHECK-NOT: call i64 @llvm.arm.smlad ; CHECK-UNSUPPORTED-NOT: call i64 @llvm.arm.smlad for.body: - %pin2 = phi i16* [ %pin2.sub4, %for.body ], [ %scevgep11, %for.body.preheader.new ] - %pin1 = phi i16* [ %pin1.add4, %for.body ], [ %scevgep6, %for.body.preheader.new ] + %pin2 = phi ptr [ %pin2.sub4, %for.body ], [ %scevgep11, %for.body.preheader.new ] + %pin1 = phi ptr [ %pin1.add4, %for.body ], [ %scevgep6, %for.body.preheader.new ] %i.011 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ] %sum.010 = phi i64 [ 0, %for.body.preheader.new ], [ %add.3, %for.body ] - %pin2.add1 = getelementptr i16, i16* %pin2, i32 1 - %In2 = load i16, i16* %pin2.add1, align 2 - %pin1.sub2 = getelementptr i16, i16* %pin1, i32 -2 - %In1 = load i16, i16* %pin1.sub2, align 2 - %In2.1 = load i16, i16* %pin2, align 2 - %pin1.sub1 = getelementptr i16, i16* %pin1, i32 -1 - %In1.1 = load i16, i16* %pin1.sub1, align 2 - %pin2.sub1 = getelementptr i16, i16* %pin2, i32 -1 - %In2.2 = load i16, i16* %pin2.sub1, align 2 - %In1.2 = load i16, i16* %pin1, align 2 - %pin2.sub2 = getelementptr i16, i16* %pin2, i32 -2 - %In2.3 = load i16, i16* %pin2.sub2, align 2 - %pin1.add1 = getelementptr i16, i16* %pin1, i32 1 - %In1.3 = load i16, i16* %pin1.add1, align 2 + %pin2.add1 = getelementptr i16, ptr %pin2, i32 1 + %In2 = load i16, ptr %pin2.add1, align 2 + %pin1.sub2 = getelementptr i16, ptr %pin1, i32 -2 + %In1 = load i16, ptr %pin1.sub2, align 2 + %In2.1 = load i16, ptr %pin2, align 2 + %pin1.sub1 = getelementptr i16, ptr %pin1, i32 -1 + %In1.1 = load i16, ptr %pin1.sub1, align 2 + %pin2.sub1 = getelementptr i16, ptr %pin2, i32 -1 + %In2.2 = load i16, ptr %pin2.sub1, align 2 + %In1.2 = load i16, ptr %pin1, align 2 + %pin2.sub2 = getelementptr i16, ptr %pin2, i32 -2 + %In2.3 = load i16, ptr %pin2.sub2, align 2 + %pin1.add1 = getelementptr i16, ptr %pin1, i32 1 + %In1.3 = load i16, ptr %pin1.add1, align 2 %sextIn2 = sext i16 %In2 to i32 %sextIn1 = sext i16 %In1 to i32 %sextIn2.1 = sext i16 %In2.1 to i32 @@ -243,8 +234,8 @@ for.body: %sext.mul.3 = sext i32 %mul.3 to i64 %add.3 = add nsw i64 %add.2, %sext.mul.3 %inc.3 = add i32 %i.011, 4 - %pin1.add4 = getelementptr i16, i16* %pin1, i32 4 - %pin2.sub4 = getelementptr i16, i16* %pin2, i32 -4 + %pin1.add4 = getelementptr i16, ptr %pin1, i32 4 + %pin2.sub4 = getelementptr i16, ptr %pin2, i32 -4 %niter.ncmp.3 = icmp eq i32 %unroll_iter, %inc.3 br i1 %niter.ncmp.3, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body } -- 2.7.4