From a8a31fdd80ccd5c9939ac41a8751ab3e7b8e146d Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Sun, 19 Apr 2020 12:56:16 +0200 Subject: [PATCH] [Scalarizer] Fix a non-deterministic scatter order problem Summary: The indexing operator in Scatterer may result in building new instructions. When using multiple such operators in a function argument list the order in which we build instructions depend on argument evaluation order (which is undefined in C++). This patch avoid such problems by expanding the components using the [] operator prior to the function call. Problem was seen when comparing output, while builing LLVM with different compilers (clang vs gcc). Reviewers: foad, cameron.mcinally, uabelho Reviewed By: foad Subscribers: hiraditya, mgrang, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D78455 --- llvm/lib/Transforms/Scalar/Scalarizer.cpp | 51 +++++++++------- llvm/test/Transforms/Scalarizer/scatter-order.ll | 76 ++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 20 deletions(-) create mode 100644 llvm/test/Transforms/Scalarizer/scatter-order.ll diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index 8404188..3675e97 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -485,15 +485,17 @@ bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) { unsigned NumElems = VT->getNumElements(); IRBuilder<> Builder(&I); - Scatterer Op0 = scatter(&I, I.getOperand(0)); - Scatterer Op1 = scatter(&I, I.getOperand(1)); - assert(Op0.size() == NumElems && "Mismatched binary operation"); - assert(Op1.size() == NumElems && "Mismatched binary operation"); + Scatterer VOp0 = scatter(&I, I.getOperand(0)); + Scatterer VOp1 = scatter(&I, I.getOperand(1)); + assert(VOp0.size() == NumElems && "Mismatched binary operation"); + assert(VOp1.size() == NumElems && "Mismatched binary operation"); ValueVector Res; Res.resize(NumElems); - for (unsigned Elem = 0; Elem < NumElems; ++Elem) - Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem], - I.getName() + ".i" + Twine(Elem)); + for (unsigned Elem = 0; Elem < NumElems; ++Elem) { + Value *Op0 = VOp0[Elem]; + Value *Op1 = VOp1[Elem]; + Res[Elem] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Elem)); + } gather(&I, Res); return true; } @@ -576,24 +578,31 @@ bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) { unsigned NumElems = VT->getNumElements(); IRBuilder<> Builder(&SI); - Scatterer Op1 = scatter(&SI, SI.getOperand(1)); - Scatterer Op2 = scatter(&SI, SI.getOperand(2)); - assert(Op1.size() == NumElems && "Mismatched select"); - assert(Op2.size() == NumElems && "Mismatched select"); + Scatterer VOp1 = scatter(&SI, SI.getOperand(1)); + Scatterer VOp2 = scatter(&SI, SI.getOperand(2)); + assert(VOp1.size() == NumElems && "Mismatched select"); + assert(VOp2.size() == NumElems && "Mismatched select"); ValueVector Res; Res.resize(NumElems); if (SI.getOperand(0)->getType()->isVectorTy()) { - Scatterer Op0 = scatter(&SI, SI.getOperand(0)); - assert(Op0.size() == NumElems && "Mismatched select"); - for (unsigned I = 0; I < NumElems; ++I) - Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I], + Scatterer VOp0 = scatter(&SI, SI.getOperand(0)); + assert(VOp0.size() == NumElems && "Mismatched select"); + for (unsigned I = 0; I < NumElems; ++I) { + Value *Op0 = VOp0[I]; + Value *Op1 = VOp1[I]; + Value *Op2 = VOp2[I]; + Res[I] = Builder.CreateSelect(Op0, Op1, Op2, SI.getName() + ".i" + Twine(I)); + } } else { Value *Op0 = SI.getOperand(0); - for (unsigned I = 0; I < NumElems; ++I) - Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I], + for (unsigned I = 0; I < NumElems; ++I) { + Value *Op1 = VOp1[I]; + Value *Op2 = VOp2[I]; + Res[I] = Builder.CreateSelect(Op0, Op1, Op2, SI.getName() + ".i" + Twine(I)); + } } gather(&SI, Res); return true; @@ -822,14 +831,16 @@ bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) { unsigned NumElems = Layout.VecTy->getNumElements(); IRBuilder<> Builder(&SI); - Scatterer Ptr = scatter(&SI, SI.getPointerOperand()); - Scatterer Val = scatter(&SI, FullValue); + Scatterer VPtr = scatter(&SI, SI.getPointerOperand()); + Scatterer VVal = scatter(&SI, FullValue); ValueVector Stores; Stores.resize(NumElems); for (unsigned I = 0; I < NumElems; ++I) { unsigned Align = Layout.getElemAlign(I); - Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], MaybeAlign(Align)); + Value *Val = VVal[I]; + Value *Ptr = VPtr[I]; + Stores[I] = Builder.CreateAlignedStore(Val, Ptr, MaybeAlign(Align)); } transferMetadataAndIRFlags(&SI, Stores); return true; diff --git a/llvm/test/Transforms/Scalarizer/scatter-order.ll b/llvm/test/Transforms/Scalarizer/scatter-order.ll new file mode 100644 index 0000000..df072ab --- /dev/null +++ b/llvm/test/Transforms/Scalarizer/scatter-order.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -scalarizer -scalarize-load-store -S | FileCheck %s +; RUN: opt %s -passes='function(scalarizer)' -scalarize-load-store -S | FileCheck %s + +; This verifies that the order of extract element instructions is +; deterministic. In the past we could end up with different results depending +; on the compiler used (due to argument evaluation order being undefined in +; C++). The order of the extracts is not really important for correctness of +; the result, but when debugging and creating test cases it is helpful if we +; get the same out put regardless of which compiler we use when building the +; compiler. + +define <2 x i32> @test1(i1 %b, <2 x i32> %i, <2 x i32> %j) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = select i1 [[B:%.*]], i32 [[I_I0]], i32 [[J_I0]] +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 +; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = select i1 [[B]], i32 [[I_I1]], i32 [[J_I1]] +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> undef, i32 [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x i32> [[RES]] +; + %res = select i1 %b, <2 x i32> %i, <2 x i32> %j + ret <2 x i32> %res +} + +define <2 x i32> @test2(<2 x i1> %b, <2 x i32> %i, <2 x i32> %j) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <2 x i1> [[B:%.*]], i32 0 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = select i1 [[B_I0]], i32 [[I_I0]], i32 [[J_I0]] +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <2 x i1> [[B]], i32 1 +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 +; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = select i1 [[B_I1]], i32 [[I_I1]], i32 [[J_I1]] +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> undef, i32 [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x i32> [[RES]] +; + %res = select <2 x i1> %b, <2 x i32> %i, <2 x i32> %j + ret <2 x i32> %res +} + +define <2 x i32> @test3(<2 x i32> %i, <2 x i32> %j) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 +; CHECK-NEXT: [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]] +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 +; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 +; CHECK-NEXT: [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]] +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> undef, i32 [[RES_I0]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 +; CHECK-NEXT: ret <2 x i32> [[RES]] +; + %res = add nuw nsw <2 x i32> %i, %j + ret <2 x i32> %res +} + +define void @test4(<2 x i32>* %ptr, <2 x i32> %val) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: [[VAL_I0:%.*]] = extractelement <2 x i32> [[VAL:%.*]], i32 0 +; CHECK-NEXT: [[PTR_I0:%.*]] = bitcast <2 x i32>* [[PTR:%.*]] to i32* +; CHECK-NEXT: store i32 [[VAL_I0]], i32* [[PTR_I0]], align 8 +; CHECK-NEXT: [[VAL_I1:%.*]] = extractelement <2 x i32> [[VAL]], i32 1 +; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr i32, i32* [[PTR_I0]], i32 1 +; CHECK-NEXT: store i32 [[VAL_I1]], i32* [[PTR_I1]], align 4 +; CHECK-NEXT: ret void +; + store <2 x i32> %val, <2 x i32> *%ptr + ret void +} + -- 2.7.4