From: Alexandros Lamprineas Date: Mon, 17 Dec 2018 10:45:43 +0000 (+0000) Subject: [AArch64] Re-run load/store optimizer after aggressive tail duplication X-Git-Tag: llvmorg-8.0.0-rc1~1944 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=490ae11717b8da8c50a6d73e6c392df8dfd28553;p=platform%2Fupstream%2Fllvm.git [AArch64] Re-run load/store optimizer after aggressive tail duplication The Load/Store Optimizer runs before Machine Block Placement. At O3 the Tail Duplication Threshold is set to 4 instructions and this can create new opportunities for the Load/Store Optimizer. It seems worthwhile to run it once again. llvm-svn: 349338 --- diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index d5e2470..32c8534 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -556,6 +556,12 @@ void AArch64PassConfig::addPreSched2() { } void AArch64PassConfig::addPreEmitPass() { + // Machine Block Placement might have created new opportunities when run + // at O3, where the Tail Duplication Threshold is set to 4 instructions. + // Run the load/store optimizer once more. + if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt) + addPass(createAArch64LoadStoreOptimizationPass()); + if (EnableA53Fix835769) addPass(createAArch64A53Fix835769()); // Relax conditional branch instructions if they're otherwise out of diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index a32da0b..29682b7 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -154,6 +154,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: MachinePostDominator Tree Construction ; CHECK-NEXT: Branch Probability Basic Block Placement +; CHECK-NEXT: AArch64 load / store optimization pass ; CHECK-NEXT: Branch relaxation pass ; CHECK-NEXT: AArch64 Branch Targets ; CHECK-NEXT: AArch64 Compress Jump Tables diff --git a/llvm/test/CodeGen/AArch64/ldst-opt-after-block-placement.ll b/llvm/test/CodeGen/AArch64/ldst-opt-after-block-placement.ll new file mode 100644 index 0000000..468f773 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ldst-opt-after-block-placement.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 -mtriple=aarch64-arm < %s | FileCheck %s + +; Run at O3 to make sure we can optimize load/store instructions after Machine +; Block Placement takes place using Tail Duplication Threshold = 4. + +define void @foo(i1 %cond, i64* %ptr) { +; CHECK-LABEL: foo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: tbz w0, #0, .LBB0_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: ldp x9, x8, [x1, #8] +; CHECK-NEXT: str xzr, [x1, #16] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.lt .LBB0_3 +; CHECK-NEXT: b .LBB0_4 +; CHECK-NEXT: .LBB0_2: // %if.else +; CHECK-NEXT: ldp x8, x9, [x1] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ge .LBB0_4 +; CHECK-NEXT: .LBB0_3: // %exit1 +; CHECK-NEXT: str xzr, [x1, #8] +; CHECK-NEXT: .LBB0_4: // %exit2 +; CHECK-NEXT: ret +entry: + br i1 %cond, label %if.then, label %if.else + +if.then: + %0 = getelementptr inbounds i64, i64* %ptr, i64 2 + %1 = load i64, i64* %0, align 8 + store i64 0, i64* %0, align 8 + br label %if.end + +if.else: + %2 = load i64, i64* %ptr, align 8 + br label %if.end + +if.end: + %3 = phi i64 [ %1, %if.then ], [ %2, %if.else ] + %4 = getelementptr inbounds i64, i64* %ptr, i64 1 + %5 = load i64, i64* %4, align 8 + %6 = icmp slt i64 %3, %5 + br i1 %6, label %exit1, label %exit2 + +exit1: + store i64 0, i64* %4, align 8 + ret void + +exit2: + ret void +}