From f946c70130471baf66d668420d9a459d949e48e3 Mon Sep 17 00:00:00 2001 From: Qiongsi Wu Date: Mon, 28 Nov 2022 09:55:11 -0500 Subject: [PATCH] [SLPVectorizer] Do Not Move Loads/Stores Beyond Stacksave/Stackrestore Boundaries If left unchecked, the SLPVecrtorizer can move loads/stores below a stackrestore. The move can cause issues if the loads/stores have pointer operands from `alloca`s that are reset by the stackrestores. This patch adds the dependency check. The check is conservative, in that it does not check if the pointer operands of the loads/stores are actually from `alloca`s that may be reset. We did not observe any SPECCPU2017 performance degradation so this simple fix seems sufficient. The test could have been added to `llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll`, but that test has not been updated to use opaque pointers. I am not inclined to add tests that still use typed pointers, or to refactor `llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll` to use opaque pointers in this patch. If desired, I will open a different patch to refactor and consolidate the tests. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D138585 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 9 +++-- .../SLPVectorizer/X86/stackrestore-dependence.ll | 41 ++++++++++++++++++++++ 2 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/stackrestore-dependence.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1715d4e..46348cb 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10043,9 +10043,12 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, } // In addition to the cases handle just above, we need to prevent - // allocas from moving below a stacksave. The stackrestore case - // is currently thought to be conservatism. - if (isa(BundleMember->Inst)) { + // allocas and loads/stores from moving below a stacksave or a + // stackrestore. Avoiding moving allocas below stackrestore is currently + // thought to be conservatism. Moving loads/stores below a stackrestore + // can lead to incorrect code. + if (isa(BundleMember->Inst) || + BundleMember->Inst->mayReadOrWriteMemory()) { for (Instruction *I = BundleMember->Inst->getNextNode(); I != ScheduleEnd; I = I->getNextNode()) { if (!match(I, m_Intrinsic()) && diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stackrestore-dependence.ll b/llvm/test/Transforms/SLPVectorizer/X86/stackrestore-dependence.ll new file mode 100644 index 0000000..c7e51a5 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/stackrestore-dependence.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=slp-vectorizer -slp-threshold=-999 -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -S < %s | FileCheck %s + +; The test checks that loads should not be moved below a stackrestore boundary. +define void @stackrestore1(ptr %out) { +; CHECK-LABEL: @stackrestore1( +; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave() +; CHECK-NEXT: [[LOCAL_ALLOCA:%.*]] = alloca [16 x i8], align 4 +; CHECK-NEXT: store <4 x float> , ptr [[LOCAL_ALLOCA]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[LOCAL_ALLOCA]], align 4 +; CHECK-NEXT: call void @llvm.stackrestore(ptr [[STACK]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: store <4 x float> [[SHUFFLE]], ptr [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; + %stack = call ptr @llvm.stacksave() + %local_alloca = alloca [16 x i8], align 4 + store float 0x3FF3333340000000, ptr %local_alloca, align 4 + %addr1 = getelementptr inbounds i8, ptr %local_alloca, i64 4 + store float 0x3FF3333340000000, ptr %addr1, align 4 + %addr2 = getelementptr inbounds i8, ptr %local_alloca, i64 8 + store float 0x3FF3333340000000, ptr %addr2, align 4 + %addr3 = getelementptr inbounds i8, ptr %local_alloca, i64 12 + store float 0x3FF3333340000000, ptr %addr3, align 4 + %val0 = load float, ptr %local_alloca, align 4 + %val1 = load float, ptr %addr1, align 4 + %val2 = load float, ptr %addr2, align 4 + %val3 = load float, ptr %addr3, align 4 + call void @llvm.stackrestore(i8* %stack) + %outaddr2 = getelementptr inbounds float, ptr %out, i64 2 + store float %val0, ptr %outaddr2, align 4 + %outaddr3 = getelementptr inbounds float, ptr %out, i64 3 + store float %val1, ptr %outaddr3, align 4 + store float %val2, ptr %out + %outaddr1 = getelementptr inbounds float, ptr %out, i64 1 + store float %val3, ptr %outaddr1, align 4 + ret void +} + +declare i8* @llvm.stacksave() +declare void @llvm.stackrestore(i8*) -- 2.7.4