From f5dbbf494ff0020978d7cdd052980e9ab9c05edb Mon Sep 17 00:00:00 2001 From: khei4 Date: Mon, 15 May 2023 22:33:15 +0900 Subject: [PATCH] [ConstantFold] use StoreSize for VectorType folding Differential Revision: https://reviews.llvm.org/D150515 Reviewed By: nikic --- llvm/lib/Analysis/ConstantFolding.cpp | 10 ++++++++-- llvm/test/Transforms/InstCombine/load-gep-overalign.ll | 16 ++++++---------- llvm/test/Transforms/InstCombine/load.ll | 14 ++++++++++++++ 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index ce6f334..7a4ea74 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -501,16 +501,22 @@ bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr, if (isa(C) || isa(C) || isa(C)) { - uint64_t NumElts; + uint64_t NumElts, EltSize; Type *EltTy; if (auto *AT = dyn_cast(C->getType())) { NumElts = AT->getNumElements(); EltTy = AT->getElementType(); + EltSize = DL.getTypeAllocSize(EltTy); } else { NumElts = cast(C->getType())->getNumElements(); EltTy = cast(C->getType())->getElementType(); + // TODO: For non-byte-sized vectors, current implementation assumes there is + // padding to the next byte boundary between elements. + if (!DL.typeSizeEqualsStoreSize(EltTy)) + return false; + + EltSize = DL.getTypeStoreSize(EltTy); } - uint64_t EltSize = DL.getTypeAllocSize(EltTy); uint64_t Index = ByteOffset / EltSize; uint64_t Offset = ByteOffset - Index * EltSize; diff --git a/llvm/test/Transforms/InstCombine/load-gep-overalign.ll b/llvm/test/Transforms/InstCombine/load-gep-overalign.ll index 70d5119..eb35133 100644 --- a/llvm/test/Transforms/InstCombine/load-gep-overalign.ll +++ b/llvm/test/Transforms/InstCombine/load-gep-overalign.ll @@ -11,10 +11,6 @@ define void @test_vector_load_i8() { ; Access and report each individual byte in @foo. ; OVERALIGNED and NATURAL should have the same result, because the layout of vectors ignores ; element type alignment, and thus the representation of @foo is the same in both cases. -; -; TODO: The OVERALIGNED result is incorrect, as apparently padding bytes -; are assumed as they would appear in an array. In vectors, there is no padding. -; ; NATURAL-LABEL: @test_vector_load_i8( ; NATURAL-NEXT: call void @report(i64 0, i8 1) ; NATURAL-NEXT: call void @report(i64 1, i8 35) @@ -29,12 +25,12 @@ define void @test_vector_load_i8() { ; OVERALIGNED-LABEL: @test_vector_load_i8( ; OVERALIGNED-NEXT: call void @report(i64 0, i8 1) ; OVERALIGNED-NEXT: call void @report(i64 1, i8 35) -; OVERALIGNED-NEXT: call void @report(i64 2, i8 0) -; OVERALIGNED-NEXT: call void @report(i64 3, i8 0) -; OVERALIGNED-NEXT: call void @report(i64 4, i8 69) -; OVERALIGNED-NEXT: call void @report(i64 5, i8 103) -; OVERALIGNED-NEXT: call void @report(i64 6, i8 0) -; OVERALIGNED-NEXT: call void @report(i64 7, i8 0) +; OVERALIGNED-NEXT: call void @report(i64 2, i8 69) +; OVERALIGNED-NEXT: call void @report(i64 3, i8 103) +; OVERALIGNED-NEXT: call void @report(i64 4, i8 -119) +; OVERALIGNED-NEXT: call void @report(i64 5, i8 -85) +; OVERALIGNED-NEXT: call void @report(i64 6, i8 -51) +; OVERALIGNED-NEXT: call void @report(i64 7, i8 -17) ; OVERALIGNED-NEXT: ret void ; %ptr0 = getelementptr i8, ptr @foo, i64 0 diff --git a/llvm/test/Transforms/InstCombine/load.ll b/llvm/test/Transforms/InstCombine/load.ll index 0eee309..cd00ac7 100644 --- a/llvm/test/Transforms/InstCombine/load.ll +++ b/llvm/test/Transforms/InstCombine/load.ll @@ -413,3 +413,17 @@ define i32 @load_via_strip_invariant_group() { %d = load i32, ptr %b ret i32 %d } + +; TODO: For non-byte-sized vectors, current implementation assumes there is +; padding to the next byte boundary between elements. +@foo = constant <2 x i4> , align 8 + +define i4 @test_vector_load_i4_non_byte_sized() { +; CHECK-LABEL: @test_vector_load_i4_non_byte_sized( +; CHECK-NEXT: [[RES0:%.*]] = load i4, ptr @foo, align 8 +; CHECK-NEXT: ret i4 [[RES0]] +; + %ptr0 = getelementptr i8, ptr @foo, i64 0 + %res0 = load i4, ptr %ptr0, align 1 + ret i4 %res0 +} -- 2.7.4