From eb7c515d664c0ae6c17ad4094343b29cbd33719a Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Thu, 22 Dec 2022 00:59:56 +0300 Subject: [PATCH] [NFC][SROA] More tests for promotion with variable index Also, delete the InstCombine test, it's not going to be relevant. --- .../InstCombine/widen-load-of-small-alloca.ll | 618 --------------------- .../Transforms/SROA/widen-load-of-small-alloca.ll | 136 +++++ 2 files changed, 136 insertions(+), 618 deletions(-) delete mode 100644 llvm/test/Transforms/InstCombine/widen-load-of-small-alloca.ll diff --git a/llvm/test/Transforms/InstCombine/widen-load-of-small-alloca.ll b/llvm/test/Transforms/InstCombine/widen-load-of-small-alloca.ll deleted file mode 100644 index a3c4182..0000000 --- a/llvm/test/Transforms/InstCombine/widen-load-of-small-alloca.ll +++ /dev/null @@ -1,618 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=instcombine -data-layout="e-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-LE-64 -; RUN: opt -passes=instcombine -data-layout="e-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-LE-32 -; RUN: opt -passes=instcombine -data-layout="E-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-BE-64 -; RUN: opt -passes=instcombine -data-layout="E-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-BE-32 - -define void @load-1byte-chunk-of-1byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-1byte-chunk-of-1byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [1 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <1 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <1 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v1i8(<1 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v1i8(<1 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [1 x i8], align 64 - %init = load <1 x i8>, ptr %src, align 1 - store <1 x i8> %init, ptr %intermediate, align 64 - call void @use.v1i8(<1 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v1i8(<1 x i8> %chunk) - ret void -} - -define void @load-1byte-chunk-of-2byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-1byte-chunk-of-2byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [2 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <2 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <2 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v2i8(<2 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v1i8(<1 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [2 x i8], align 64 - %init = load <2 x i8>, ptr %src, align 1 - store <2 x i8> %init, ptr %intermediate, align 64 - call void @use.v2i8(<2 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v1i8(<1 x i8> %chunk) - ret void -} - -define void @load-2byte-chunk-of-2byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-2byte-chunk-of-2byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [2 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <2 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <2 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v2i8(<2 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v2i8(<2 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [2 x i8], align 64 - %init = load <2 x i8>, ptr %src, align 1 - store <2 x i8> %init, ptr %intermediate, align 64 - call void @use.v2i8(<2 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v2i8(<2 x i8> %chunk) - ret void -} - -define void @load-1byte-chunk-of-4byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-1byte-chunk-of-4byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [4 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v4i8(<4 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v1i8(<1 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [4 x i8], align 64 - %init = load <4 x i8>, ptr %src, align 1 - store <4 x i8> %init, ptr %intermediate, align 64 - call void @use.v4i8(<4 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v1i8(<1 x i8> %chunk) - ret void -} - -define void @load-2byte-chunk-of-4byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-2byte-chunk-of-4byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [4 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v4i8(<4 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v2i8(<2 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [4 x i8], align 64 - %init = load <4 x i8>, ptr %src, align 1 - store <4 x i8> %init, ptr %intermediate, align 64 - call void @use.v4i8(<4 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v2i8(<2 x i8> %chunk) - ret void -} - -define void @load-4byte-chunk-of-4byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-4byte-chunk-of-4byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [4 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v4i8(<4 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v4i8(<4 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [4 x i8], align 64 - %init = load <4 x i8>, ptr %src, align 1 - store <4 x i8> %init, ptr %intermediate, align 64 - call void @use.v4i8(<4 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v4i8(<4 x i8> %chunk) - ret void -} - -define void @load-1byte-chunk-of-8byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-1byte-chunk-of-8byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v8i8(<8 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v1i8(<1 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [8 x i8], align 64 - %init = load <8 x i8>, ptr %src, align 1 - store <8 x i8> %init, ptr %intermediate, align 64 - call void @use.v8i8(<8 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v1i8(<1 x i8> %chunk) - ret void -} - -define void @load-2byte-chunk-of-8byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v8i8(<8 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v2i8(<2 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [8 x i8], align 64 - %init = load <8 x i8>, ptr %src, align 1 - store <8 x i8> %init, ptr %intermediate, align 64 - call void @use.v8i8(<8 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v2i8(<2 x i8> %chunk) - ret void -} - -define void @load-4byte-chunk-of-8byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-4byte-chunk-of-8byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v8i8(<8 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v4i8(<4 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [8 x i8], align 64 - %init = load <8 x i8>, ptr %src, align 1 - store <8 x i8> %init, ptr %intermediate, align 64 - call void @use.v8i8(<8 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v4i8(<4 x i8> %chunk) - ret void -} - -define void @load-8byte-chunk-of-8byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-8byte-chunk-of-8byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v8i8(<8 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v8i8(<8 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [8 x i8], align 64 - %init = load <8 x i8>, ptr %src, align 1 - store <8 x i8> %init, ptr %intermediate, align 64 - call void @use.v8i8(<8 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <8 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v8i8(<8 x i8> %chunk) - ret void -} - -define void @load-1byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-1byte-chunk-of-16byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v16i8(<16 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v1i8(<1 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [16 x i8], align 64 - %init = load <16 x i8>, ptr %src, align 1 - store <16 x i8> %init, ptr %intermediate, align 64 - call void @use.v16i8(<16 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v1i8(<1 x i8> %chunk) - ret void -} - -define void @load-2byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-2byte-chunk-of-16byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v16i8(<16 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v2i8(<2 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [16 x i8], align 64 - %init = load <16 x i8>, ptr %src, align 1 - store <16 x i8> %init, ptr %intermediate, align 64 - call void @use.v16i8(<16 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v2i8(<2 x i8> %chunk) - ret void -} - -define void @load-4byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-4byte-chunk-of-16byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v16i8(<16 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v4i8(<4 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [16 x i8], align 64 - %init = load <16 x i8>, ptr %src, align 1 - store <16 x i8> %init, ptr %intermediate, align 64 - call void @use.v16i8(<16 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v4i8(<4 x i8> %chunk) - ret void -} - -define void @load-8byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-8byte-chunk-of-16byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v16i8(<16 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v8i8(<8 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [16 x i8], align 64 - %init = load <16 x i8>, ptr %src, align 1 - store <16 x i8> %init, ptr %intermediate, align 64 - call void @use.v16i8(<16 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <8 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v8i8(<8 x i8> %chunk) - ret void -} - -define void @load-16byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-16byte-chunk-of-16byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v16i8(<16 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <16 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v16i8(<16 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [16 x i8], align 64 - %init = load <16 x i8>, ptr %src, align 1 - store <16 x i8> %init, ptr %intermediate, align 64 - call void @use.v16i8(<16 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <16 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v16i8(<16 x i8> %chunk) - ret void -} - -define void @load-1byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-1byte-chunk-of-32byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v32i8(<32 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v1i8(<1 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [32 x i8], align 64 - %init = load <32 x i8>, ptr %src, align 1 - store <32 x i8> %init, ptr %intermediate, align 64 - call void @use.v32i8(<32 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v1i8(<1 x i8> %chunk) - ret void -} - -define void @load-2byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-2byte-chunk-of-32byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v32i8(<32 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v2i8(<2 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [32 x i8], align 64 - %init = load <32 x i8>, ptr %src, align 1 - store <32 x i8> %init, ptr %intermediate, align 64 - call void @use.v32i8(<32 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v2i8(<2 x i8> %chunk) - ret void -} - -define void @load-4byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-4byte-chunk-of-32byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v32i8(<32 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v4i8(<4 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [32 x i8], align 64 - %init = load <32 x i8>, ptr %src, align 1 - store <32 x i8> %init, ptr %intermediate, align 64 - call void @use.v32i8(<32 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v4i8(<4 x i8> %chunk) - ret void -} - -define void @load-8byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-8byte-chunk-of-32byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v32i8(<32 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v8i8(<8 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [32 x i8], align 64 - %init = load <32 x i8>, ptr %src, align 1 - store <32 x i8> %init, ptr %intermediate, align 64 - call void @use.v32i8(<32 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <8 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v8i8(<8 x i8> %chunk) - ret void -} - -define void @load-16byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-16byte-chunk-of-32byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v32i8(<32 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <16 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v16i8(<16 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [32 x i8], align 64 - %init = load <32 x i8>, ptr %src, align 1 - store <32 x i8> %init, ptr %intermediate, align 64 - call void @use.v32i8(<32 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <16 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v16i8(<16 x i8> %chunk) - ret void -} - -define void @load-32byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-32byte-chunk-of-32byte-alloca( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v32i8(<32 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <32 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v32i8(<32 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [32 x i8], align 64 - %init = load <32 x i8>, ptr %src, align 1 - store <32 x i8> %init, ptr %intermediate, align 64 - call void @use.v32i8(<32 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff - %chunk = load <32 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v32i8(<32 x i8> %chunk) - ret void -} - -;; Special test - -define void @load-2byte-chunk-of-8byte-alloca-with-2byte-step(ptr %src, i64 %byteOff, ptr %escape) { -; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca-with-2byte-step( -; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64 -; CHECK-ALL-NEXT: [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-ALL-NEXT: store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 -; CHECK-ALL-NEXT: call void @use.v8i8(<8 x i8> [[INIT]]) -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE]]) -; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] -; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 2 -; CHECK-ALL-NEXT: call void @use.ptr(ptr nonnull [[INTERMEDIATE_OFF_ADDR]]) -; CHECK-ALL-NEXT: call void @use.v2i8(<2 x i8> [[CHUNK]]) -; CHECK-ALL-NEXT: ret void -; - %intermediate = alloca [8 x i8], align 64 - %init = load <8 x i8>, ptr %src, align 1 - store <8 x i8> %init, ptr %intermediate, align 64 - call void @use.v8i8(<8 x i8> %init) - call void @use.ptr(ptr %intermediate) - - %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff - %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1 - - call void @use.ptr(ptr %intermediate.off.addr) - call void @use.v2i8(<2 x i8> %chunk) - ret void -} - -declare void @use.ptr(ptr) -declare void @use.v1i8(<1 x i8>) -declare void @use.v2i8(<2 x i8>) -declare void @use.v4i8(<4 x i8>) -declare void @use.v8i8(<8 x i8>) -declare void @use.v16i8(<16 x i8>) -declare void @use.v32i8(<32 x i8>) -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-ALL: {{.*}} -; CHECK-BE-32: {{.*}} -; CHECK-BE-64: {{.*}} -; CHECK-LE-32: {{.*}} -; CHECK-LE-64: {{.*}} -; CHECK-SCALAR: {{.*}} -; CHECK-SCALAR-32: {{.*}} -; CHECK-SCALAR-64: {{.*}} diff --git a/llvm/test/Transforms/SROA/widen-load-of-small-alloca.ll b/llvm/test/Transforms/SROA/widen-load-of-small-alloca.ll index 4e532b9..4cd4ae2 100644 --- a/llvm/test/Transforms/SROA/widen-load-of-small-alloca.ll +++ b/llvm/test/Transforms/SROA/widen-load-of-small-alloca.ll @@ -489,6 +489,142 @@ define void @store-volatile-2byte-chunk-of-8byte-alloca-with-2byte-step(ptr %src ret void } +define void @load-2byte-chunk-of-8byte-alloca-with-2byte-step-with-constant-offset-beforehand(ptr %src, i64 %byteOff) { +; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca-with-2byte-step-with-constant-offset-beforehand( +; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64 +; CHECK-ALL-NEXT: [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1 +; CHECK-ALL-NEXT: store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR_CST:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 1 +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF_ADDR_CST]], i64 [[BYTEOFF:%.*]] +; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 +; CHECK-ALL-NEXT: call void @use.v2i8(<2 x i8> [[CHUNK]]) +; CHECK-ALL-NEXT: ret void +; + %intermediate = alloca [8 x i8], align 64 + %init = load <8 x i8>, ptr %src, align 1 + store <8 x i8> %init, ptr %intermediate, align 64 + %intermediate.off.addr.cst = getelementptr inbounds i16, ptr %intermediate, i64 1 + %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate.off.addr.cst, i64 %byteOff + %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1 + call void @use.v2i8(<2 x i8> %chunk) + ret void +} + +define void @load-2byte-chunk-of-8byte-alloca-with-2byte-step-with-constant-offset-afterwards(ptr %src, i64 %byteOff) { +; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca-with-2byte-step-with-constant-offset-afterwards( +; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64 +; CHECK-ALL-NEXT: [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1 +; CHECK-ALL-NEXT: store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR_VARIABLE:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF_ADDR_VARIABLE]], i64 1 +; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 +; CHECK-ALL-NEXT: call void @use.v2i8(<2 x i8> [[CHUNK]]) +; CHECK-ALL-NEXT: ret void +; + %intermediate = alloca [8 x i8], align 64 + %init = load <8 x i8>, ptr %src, align 1 + store <8 x i8> %init, ptr %intermediate, align 64 + %intermediate.off.addr.variable = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff + %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate.off.addr.variable, i64 1 + %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1 + call void @use.v2i8(<2 x i8> %chunk) + ret void +} + +define void @load-2byte-chunk-of-8byte-alloca-with-2byte-step-with-variable-offset-inbetween-constant-offsets(ptr %src, i64 %byteOff) { +; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca-with-2byte-step-with-variable-offset-inbetween-constant-offsets( +; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64 +; CHECK-ALL-NEXT: [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1 +; CHECK-ALL-NEXT: store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR_CST:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 1 +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR_VARIABLE:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF_ADDR_CST]], i64 [[BYTEOFF:%.*]] +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF_ADDR_VARIABLE]], i64 1 +; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 +; CHECK-ALL-NEXT: call void @use.v2i8(<2 x i8> [[CHUNK]]) +; CHECK-ALL-NEXT: ret void +; + %intermediate = alloca [8 x i8], align 64 + %init = load <8 x i8>, ptr %src, align 1 + store <8 x i8> %init, ptr %intermediate, align 64 + %intermediate.off.addr.cst = getelementptr inbounds i16, ptr %intermediate, i64 1 + %intermediate.off.addr.variable = getelementptr inbounds i16, ptr %intermediate.off.addr.cst, i64 %byteOff + %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate.off.addr.variable, i64 1 + %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1 + call void @use.v2i8(<2 x i8> %chunk) + ret void +} + +define void @load-2byte-chunk-of-8byte-alloca-with-2byte-step-select-of-variable-geps(ptr %src, i64 %byteOff0, i64 %byteOff1, i1 %cond) { +; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca-with-2byte-step-select-of-variable-geps( +; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64 +; CHECK-ALL-NEXT: [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1 +; CHECK-ALL-NEXT: store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF0:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF0:%.*]] +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF1:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF1:%.*]] +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[INTERMEDIATE_OFF0]], ptr [[INTERMEDIATE_OFF1]] +; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 +; CHECK-ALL-NEXT: call void @use.v2i8(<2 x i8> [[CHUNK]]) +; CHECK-ALL-NEXT: ret void +; + %intermediate = alloca [8 x i8], align 64 + %init = load <8 x i8>, ptr %src, align 1 + store <8 x i8> %init, ptr %intermediate, align 64 + %intermediate.off0 = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff0 + %intermediate.off1 = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff1 + %intermediate.off.addr = select i1 %cond, ptr %intermediate.off0, ptr %intermediate.off1 + %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1 + call void @use.v2i8(<2 x i8> %chunk) + ret void +} + +define void @load-2byte-chunk-of-8byte-alloca-with-2byte-step-select-of-variable-and-const-geps(ptr %src, i64 %byteOff0, i64 %byteOff1, i1 %cond) { +; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca-with-2byte-step-select-of-variable-and-const-geps( +; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64 +; CHECK-ALL-NEXT: [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1 +; CHECK-ALL-NEXT: store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF0:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 1 +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF1:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF1:%.*]] +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[INTERMEDIATE_OFF0]], ptr [[INTERMEDIATE_OFF1]] +; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 +; CHECK-ALL-NEXT: call void @use.v2i8(<2 x i8> [[CHUNK]]) +; CHECK-ALL-NEXT: ret void +; + %intermediate = alloca [8 x i8], align 64 + %init = load <8 x i8>, ptr %src, align 1 + store <8 x i8> %init, ptr %intermediate, align 64 + %intermediate.off0 = getelementptr inbounds i16, ptr %intermediate, i64 1 + %intermediate.off1 = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff1 + %intermediate.off.addr = select i1 %cond, ptr %intermediate.off0, ptr %intermediate.off1 + %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1 + call void @use.v2i8(<2 x i8> %chunk) + ret void +} + +define void @load-2byte-chunk-of-8byte-alloca-with-2byte-step-variable-gep-of-select-of-const-geps(ptr %src, i64 %byteOff, i1 %cond) { +; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca-with-2byte-step-variable-gep-of-select-of-const-geps( +; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64 +; CHECK-ALL-NEXT: [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1 +; CHECK-ALL-NEXT: store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF0:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 0 +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF1:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 2 +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[INTERMEDIATE_OFF0]], ptr [[INTERMEDIATE_OFF1]] +; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR_VAR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF_ADDR]], i64 [[BYTEOFF:%.*]] +; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR_VAR]], align 1 +; CHECK-ALL-NEXT: call void @use.v2i8(<2 x i8> [[CHUNK]]) +; CHECK-ALL-NEXT: ret void +; + %intermediate = alloca [8 x i8], align 64 + %init = load <8 x i8>, ptr %src, align 1 + store <8 x i8> %init, ptr %intermediate, align 64 + %intermediate.off0 = getelementptr inbounds i16, ptr %intermediate, i64 0 + %intermediate.off1 = getelementptr inbounds i16, ptr %intermediate, i64 2 + %intermediate.off.addr = select i1 %cond, ptr %intermediate.off0, ptr %intermediate.off1 + %intermediate.off.addr.var = getelementptr inbounds i16, ptr %intermediate.off.addr, i64 %byteOff + %chunk = load <2 x i8>, ptr %intermediate.off.addr.var, align 1 + call void @use.v2i8(<2 x i8> %chunk) + ret void +} + declare void @use.v1i8(<1 x i8>) declare void @use.v2i8(<2 x i8>) declare void @use.v4i8(<4 x i8>) -- 2.7.4