From 7993d61177c8c6e9aa238e57a684c6cd3db3b571 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 12 Sep 2020 23:41:48 -0500 Subject: [PATCH] [Attributor] Use smarter way to determine alignment of GEPs Use same logic existing in other places to deal with base case GEPs. Add the original Attributor talk example. --- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 18 ++++- .../ArgumentPromotion/aggregate-promote.ll | 2 +- llvm/test/Transforms/Attributor/align.ll | 78 ++++++++++++++++++---- 3 files changed, 81 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 2e0f034..6b07bbd 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -3848,9 +3848,23 @@ struct AAAlignFloating : AAAlignImpl { AAAlign::StateType &T, bool Stripped) -> bool { const auto &AA = A.getAAFor(*this, IRPosition::value(V)); if (!Stripped && this == &AA) { + int64_t Offset; + unsigned Alignment = 1; + if (const Value *Base = + GetPointerBaseWithConstantOffset(&V, Offset, DL)) { + Align PA = Base->getPointerAlignment(DL); + // BasePointerAddr + Offset = Alignment * Q for some integer Q. + // So we can say that the maximum power of two which is a divisor of + // gcd(Offset, Alignment) is an alignment. + + uint32_t gcd = greatestCommonDivisor(uint32_t(abs((int32_t)Offset)), + uint32_t(PA.value())); + Alignment = llvm::PowerOf2Floor(gcd); + } else { + Alignment = V.getPointerAlignment(DL).value(); + } // Use only IR information if we did not strip anything. - Align PA = V.getPointerAlignment(DL); - T.takeKnownMaximum(PA.value()); + T.takeKnownMaximum(Alignment); T.indicatePessimisticFixpoint(); } else { // Use abstract attribute information. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll index 8dd54ce..b175e2b 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll @@ -26,7 +26,7 @@ define internal i32 @test(%T* %p) { ; IS__CGSCC____-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* @G, i64 0, i32 3 ; IS__CGSCC____-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* @G, i64 0, i32 2 ; IS__CGSCC____-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]], align 4 -; IS__CGSCC____-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]], align 4 +; IS__CGSCC____-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]], align 8 ; IS__CGSCC____-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] ; IS__CGSCC____-NEXT: ret i32 [[V]] ; diff --git a/llvm/test/Transforms/Attributor/align.ll b/llvm/test/Transforms/Attributor/align.ll index e859194..0c36d4a 100644 --- a/llvm/test/Transforms/Attributor/align.ll +++ b/llvm/test/Transforms/Attributor/align.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM @@ -408,13 +408,13 @@ define i32* @test10a(i32* align 32 %p) { ; NOT_CGSCC_OPM-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 ; NOT_CGSCC_OPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; NOT_CGSCC_OPM: t: -; NOT_CGSCC_OPM-NEXT: [[R:%.*]] = call i32* @test10a(i32* nofree nonnull align 32 dereferenceable(4) "no-capture-maybe-returned" [[P]]) [[ATTR3]] -; NOT_CGSCC_OPM-NEXT: store i32 1, i32* [[R]], align 4 +; NOT_CGSCC_OPM-NEXT: [[R:%.*]] = call align 32 i32* @test10a(i32* nofree nonnull align 32 dereferenceable(4) "no-capture-maybe-returned" [[P]]) [[ATTR3]] +; NOT_CGSCC_OPM-NEXT: store i32 1, i32* [[R]], align 32 ; NOT_CGSCC_OPM-NEXT: [[G0:%.*]] = getelementptr i32, i32* [[P]], i32 8 ; NOT_CGSCC_OPM-NEXT: br label [[E:%.*]] ; NOT_CGSCC_OPM: f: ; NOT_CGSCC_OPM-NEXT: [[G1:%.*]] = getelementptr i32, i32* [[P]], i32 8 -; NOT_CGSCC_OPM-NEXT: store i32 -1, i32* [[G1]], align 4 +; NOT_CGSCC_OPM-NEXT: store i32 -1, i32* [[G1]], align 32 ; NOT_CGSCC_OPM-NEXT: br label [[E]] ; NOT_CGSCC_OPM: e: ; NOT_CGSCC_OPM-NEXT: [[PHI:%.*]] = phi i32* [ [[G0]], [[T]] ], [ [[G1]], [[F]] ] @@ -427,13 +427,13 @@ define i32* @test10a(i32* align 32 %p) { ; IS__CGSCC_OPM-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 ; IS__CGSCC_OPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__CGSCC_OPM: t: -; IS__CGSCC_OPM-NEXT: [[R:%.*]] = call i32* @test10a(i32* nofree nonnull align 32 dereferenceable(4) "no-capture-maybe-returned" [[P]]) [[ATTR4]] -; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[R]], align 4 +; IS__CGSCC_OPM-NEXT: [[R:%.*]] = call align 32 i32* @test10a(i32* nofree nonnull align 32 dereferenceable(4) "no-capture-maybe-returned" [[P]]) [[ATTR4]] +; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[R]], align 32 ; IS__CGSCC_OPM-NEXT: [[G0:%.*]] = getelementptr i32, i32* [[P]], i32 8 ; IS__CGSCC_OPM-NEXT: br label [[E:%.*]] ; IS__CGSCC_OPM: f: ; IS__CGSCC_OPM-NEXT: [[G1:%.*]] = getelementptr i32, i32* [[P]], i32 8 -; IS__CGSCC_OPM-NEXT: store i32 -1, i32* [[G1]], align 4 +; IS__CGSCC_OPM-NEXT: store i32 -1, i32* [[G1]], align 32 ; IS__CGSCC_OPM-NEXT: br label [[E]] ; IS__CGSCC_OPM: e: ; IS__CGSCC_OPM-NEXT: [[PHI:%.*]] = phi i32* [ [[G0]], [[T]] ], [ [[G1]], [[F]] ] @@ -470,13 +470,13 @@ define i32* @test10b(i32* align 32 %p) { ; NOT_CGSCC_OPM-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 ; NOT_CGSCC_OPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; NOT_CGSCC_OPM: t: -; NOT_CGSCC_OPM-NEXT: [[R:%.*]] = call i32* @test10b(i32* nofree nonnull align 32 dereferenceable(4) "no-capture-maybe-returned" [[P]]) [[ATTR3]] -; NOT_CGSCC_OPM-NEXT: store i32 1, i32* [[R]], align 4 +; NOT_CGSCC_OPM-NEXT: [[R:%.*]] = call align 32 i32* @test10b(i32* nofree nonnull align 32 dereferenceable(4) "no-capture-maybe-returned" [[P]]) [[ATTR3]] +; NOT_CGSCC_OPM-NEXT: store i32 1, i32* [[R]], align 32 ; NOT_CGSCC_OPM-NEXT: [[G0:%.*]] = getelementptr i32, i32* [[P]], i32 8 ; NOT_CGSCC_OPM-NEXT: br label [[E:%.*]] ; NOT_CGSCC_OPM: f: ; NOT_CGSCC_OPM-NEXT: [[G1:%.*]] = getelementptr i32, i32* [[P]], i32 -8 -; NOT_CGSCC_OPM-NEXT: store i32 -1, i32* [[G1]], align 4 +; NOT_CGSCC_OPM-NEXT: store i32 -1, i32* [[G1]], align 32 ; NOT_CGSCC_OPM-NEXT: br label [[E]] ; NOT_CGSCC_OPM: e: ; NOT_CGSCC_OPM-NEXT: [[PHI:%.*]] = phi i32* [ [[G0]], [[T]] ], [ [[G1]], [[F]] ] @@ -489,13 +489,13 @@ define i32* @test10b(i32* align 32 %p) { ; IS__CGSCC_OPM-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 ; IS__CGSCC_OPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__CGSCC_OPM: t: -; IS__CGSCC_OPM-NEXT: [[R:%.*]] = call i32* @test10b(i32* nofree nonnull align 32 dereferenceable(4) "no-capture-maybe-returned" [[P]]) [[ATTR4]] -; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[R]], align 4 +; IS__CGSCC_OPM-NEXT: [[R:%.*]] = call align 32 i32* @test10b(i32* nofree nonnull align 32 dereferenceable(4) "no-capture-maybe-returned" [[P]]) [[ATTR4]] +; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[R]], align 32 ; IS__CGSCC_OPM-NEXT: [[G0:%.*]] = getelementptr i32, i32* [[P]], i32 8 ; IS__CGSCC_OPM-NEXT: br label [[E:%.*]] ; IS__CGSCC_OPM: f: ; IS__CGSCC_OPM-NEXT: [[G1:%.*]] = getelementptr i32, i32* [[P]], i32 -8 -; IS__CGSCC_OPM-NEXT: store i32 -1, i32* [[G1]], align 4 +; IS__CGSCC_OPM-NEXT: store i32 -1, i32* [[G1]], align 32 ; IS__CGSCC_OPM-NEXT: br label [[E]] ; IS__CGSCC_OPM: e: ; IS__CGSCC_OPM-NEXT: [[PHI:%.*]] = phi i32* [ [[G0]], [[T]] ], [ [[G1]], [[F]] ] @@ -992,6 +992,56 @@ exit: ret i32 0 } +define i32* @checkAndAdvance(i32* align(16) %p) { +; NOT_CGSCC_OPM: Function Attrs: nounwind +; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@checkAndAdvance +; NOT_CGSCC_OPM-SAME: (i32* nonnull readonly align 16 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]]) [[ATTR2]] { +; NOT_CGSCC_OPM-NEXT: entry: +; NOT_CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[P]], align 16 +; NOT_CGSCC_OPM-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; NOT_CGSCC_OPM-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] +; NOT_CGSCC_OPM: if.then: +; NOT_CGSCC_OPM-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 +; NOT_CGSCC_OPM-NEXT: [[CALL:%.*]] = call nonnull align 16 i32* @checkAndAdvance(i32* nonnull readonly align 16 "no-capture-maybe-returned" [[ADD_PTR]]) [[ATTR2]] +; NOT_CGSCC_OPM-NEXT: br label [[RETURN]] +; NOT_CGSCC_OPM: return: +; NOT_CGSCC_OPM-NEXT: [[RETVAL_0:%.*]] = phi i32* [ [[CALL]], [[IF_THEN]] ], [ [[P]], [[ENTRY:%.*]] ] +; NOT_CGSCC_OPM-NEXT: call void @user_i32_ptr(i32* noalias nocapture nonnull readnone align 16 [[RETVAL_0]]) [[ATTR2]] +; NOT_CGSCC_OPM-NEXT: ret i32* [[RETVAL_0]] +; +; IS__CGSCC_OPM: Function Attrs: nounwind +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@checkAndAdvance +; IS__CGSCC_OPM-SAME: (i32* nonnull readonly align 16 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]]) [[ATTR3]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[P]], align 16 +; IS__CGSCC_OPM-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; IS__CGSCC_OPM-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] +; IS__CGSCC_OPM: if.then: +; IS__CGSCC_OPM-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call nonnull align 16 i32* @checkAndAdvance(i32* nonnull readonly align 16 "no-capture-maybe-returned" [[ADD_PTR]]) [[ATTR3]] +; IS__CGSCC_OPM-NEXT: br label [[RETURN]] +; IS__CGSCC_OPM: return: +; IS__CGSCC_OPM-NEXT: [[RETVAL_0:%.*]] = phi i32* [ [[CALL]], [[IF_THEN]] ], [ [[P]], [[ENTRY:%.*]] ] +; IS__CGSCC_OPM-NEXT: call void @user_i32_ptr(i32* noalias nocapture nonnull readnone align 16 [[RETVAL_0]]) [[ATTR3]] +; IS__CGSCC_OPM-NEXT: ret i32* [[RETVAL_0]] +; +entry: + %0 = load i32, i32* %p, align 4 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %return + +if.then: ; preds = %entry + %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 + %call = call i32* @checkAndAdvance(i32* nonnull %add.ptr) + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32* [ %call, %if.then ], [ %p, %entry ] + call void @user_i32_ptr(i32* %retval.0) + ret i32* %retval.0 +} + + attributes #0 = { nounwind uwtable noinline } attributes #1 = { uwtable noinline } attributes #2 = { null_pointer_is_valid } -- 2.7.4