From 4a54e3eed347ca2bcb840c52c553892101afd252 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 31 Mar 2022 08:21:28 -0400 Subject: [PATCH] [x86] try to replace 0.0 in fcmp with negated operand This inverts a fold recently added to IR with: 3491f2f4b033 We can put -bidirectional on the Alive2 examples to show that the reverse transforms work: https://alive2.llvm.org/ce/z/8iVQwB The motivation for the IR change was to improve matching to 'fabs' in IR (see https://github.com/llvm/llvm-project/issues/38828 ), but it regressed x86 codegen for 'not-quite-fabs' patterns like (X > -X) ? X : -X. Ie, when there is no fast-math (nsz), the cmp+select is not a proper fabs operation, but it does map nicely to the unusual NAN semantics of MINSS/MAXSS. I drafted this as a target-independent fold, but it doesn't appear to help any other targets and seems to cause regressions for SystemZ at least. Differential Revision: https://reviews.llvm.org/D122726 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 10 ++++ llvm/test/CodeGen/X86/fp-stack-compare.ll | 10 ++-- llvm/test/CodeGen/X86/setcc-combine.ll | 98 ++++++++++--------------------- 3 files changed, 45 insertions(+), 73 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8521b0e..80fec0a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51502,6 +51502,16 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, LHS.getValueType() == MVT::v4f32) return LowerVSETCC(SDValue(N, 0), Subtarget, DAG); + // X pred 0.0 --> X pred -X + // If the negation of X already exists, use it in the comparison. This removes + // the need to materialize 0.0 and allows matching to SSE's MIN/MAX + // instructions in patterns with a 'select' node. + if (isNullFPScalarOrVectorConst(RHS)) { + SDVTList FNegVT = DAG.getVTList(OpVT); + if (SDNode *FNeg = DAG.getNodeIfExists(ISD::FNEG, FNegVT, {LHS})) + return DAG.getSetCC(DL, VT, LHS, SDValue(FNeg, 0), CC); + } + return SDValue(); } diff --git a/llvm/test/CodeGen/X86/fp-stack-compare.ll b/llvm/test/CodeGen/X86/fp-stack-compare.ll index 83796b3..fabcbaf 100644 --- a/llvm/test/CodeGen/X86/fp-stack-compare.ll +++ b/llvm/test/CodeGen/X86/fp-stack-compare.ll @@ -8,13 +8,12 @@ define float @foo(float* %col) { ; BASE: # %bb.0: ; BASE-NEXT: movl {{[0-9]+}}(%esp), %eax ; BASE-NEXT: flds (%eax) -; BASE-NEXT: fldz -; BASE-NEXT: fucomp %st(1) +; BASE-NEXT: fld %st(0) +; BASE-NEXT: fchs +; BASE-NEXT: fucom %st(1) ; BASE-NEXT: fnstsw %ax ; BASE-NEXT: # kill: def $ah killed $ah killed $ax ; BASE-NEXT: sahf -; BASE-NEXT: fld %st(0) -; BASE-NEXT: fchs ; BASE-NEXT: ja .LBB0_2 ; BASE-NEXT: # %bb.1: ; BASE-NEXT: fstp %st(0) @@ -28,10 +27,9 @@ define float @foo(float* %col) { ; CMOV: # %bb.0: ; CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; CMOV-NEXT: flds (%eax) -; CMOV-NEXT: fldz -; CMOV-NEXT: fucompi %st(1), %st ; CMOV-NEXT: fld %st(0) ; CMOV-NEXT: fchs +; CMOV-NEXT: fucomi %st(1), %st ; CMOV-NEXT: fxch %st(1) ; CMOV-NEXT: fcmovnbe %st(1), %st ; CMOV-NEXT: fstp %st(1) diff --git a/llvm/test/CodeGen/X86/setcc-combine.ll b/llvm/test/CodeGen/X86/setcc-combine.ll index 33d58ca..44994ab 100644 --- a/llvm/test/CodeGen/X86/setcc-combine.ll +++ b/llvm/test/CodeGen/X86/setcc-combine.ll @@ -364,12 +364,7 @@ define float @olt(float %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; CHECK-NEXT: xorps %xmm0, %xmm1 -; CHECK-NEXT: xorps %xmm2, %xmm2 -; CHECK-NEXT: movaps %xmm0, %xmm3 -; CHECK-NEXT: cmpltss %xmm2, %xmm3 -; CHECK-NEXT: andps %xmm3, %xmm0 -; CHECK-NEXT: andnps %xmm1, %xmm3 -; CHECK-NEXT: orps %xmm3, %xmm0 +; CHECK-NEXT: minss %xmm1, %xmm0 ; CHECK-NEXT: retq %cmp = fcmp olt float %x, 0.0 %neg = fneg float %x @@ -382,11 +377,7 @@ define double @ogt(double %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: movapd {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] ; CHECK-NEXT: xorpd %xmm0, %xmm1 -; CHECK-NEXT: xorpd %xmm2, %xmm2 -; CHECK-NEXT: cmpltsd %xmm0, %xmm2 -; CHECK-NEXT: andpd %xmm2, %xmm0 -; CHECK-NEXT: andnpd %xmm1, %xmm2 -; CHECK-NEXT: orpd %xmm2, %xmm0 +; CHECK-NEXT: maxsd %xmm1, %xmm0 ; CHECK-NEXT: retq %neg = fneg double %x %cmp = fcmp ogt double %x, 0.0 @@ -395,28 +386,13 @@ define double @ogt(double %x) { } define <4 x float> @olt_swap(<4 x float> %x) { -; SSE2-LABEL: olt_swap: -; SSE2: # %bb.0: -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: cmpltps %xmm1, %xmm2 -; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: andnps %xmm0, %xmm1 -; SSE2-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: andps %xmm2, %xmm0 -; SSE2-NEXT: orps %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: olt_swap: -; SSE41: # %bb.0: -; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: xorps %xmm2, %xmm2 -; SSE41-NEXT: cmpltps %xmm2, %xmm0 -; SSE41-NEXT: movaps {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; SSE41-NEXT: xorps %xmm1, %xmm2 -; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1 -; SSE41-NEXT: movaps %xmm1, %xmm0 -; SSE41-NEXT: retq +; CHECK-LABEL: olt_swap: +; CHECK: # %bb.0: +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] +; CHECK-NEXT: xorps %xmm0, %xmm1 +; CHECK-NEXT: maxps %xmm0, %xmm1 +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: retq %cmp = fcmp olt <4 x float> %x, zeroinitializer %neg = fneg <4 x float> %x %r = select <4 x i1> %cmp, <4 x float> %neg, <4 x float> %x @@ -424,28 +400,13 @@ define <4 x float> @olt_swap(<4 x float> %x) { } define <2 x double> @ogt_swap(<2 x double> %x) { -; SSE2-LABEL: ogt_swap: -; SSE2: # %bb.0: -; SSE2-NEXT: movapd {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0] -; SSE2-NEXT: xorpd %xmm0, %xmm2 -; SSE2-NEXT: xorpd %xmm1, %xmm1 -; SSE2-NEXT: cmpltpd %xmm0, %xmm1 -; SSE2-NEXT: andpd %xmm1, %xmm2 -; SSE2-NEXT: andnpd %xmm0, %xmm1 -; SSE2-NEXT: orpd %xmm2, %xmm1 -; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: ogt_swap: -; SSE41: # %bb.0: -; SSE41-NEXT: movapd %xmm0, %xmm1 -; SSE41-NEXT: movapd {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0] -; SSE41-NEXT: xorpd %xmm0, %xmm2 -; SSE41-NEXT: xorpd %xmm0, %xmm0 -; SSE41-NEXT: cmpltpd %xmm1, %xmm0 -; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 -; SSE41-NEXT: movapd %xmm1, %xmm0 -; SSE41-NEXT: retq +; CHECK-LABEL: ogt_swap: +; CHECK: # %bb.0: +; CHECK-NEXT: movapd {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] +; CHECK-NEXT: xorpd %xmm0, %xmm1 +; CHECK-NEXT: minpd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: retq %neg = fneg <2 x double> %x %cmp = fcmp ogt <2 x double> %x, zeroinitializer %r = select <2 x i1> %cmp, <2 x double> %neg, <2 x double> %x @@ -455,23 +416,22 @@ define <2 x double> @ogt_swap(<2 x double> %x) { define <4 x float> @ole(<4 x float> %x) { ; SSE2-LABEL: ole: ; SSE2: # %bb.0: -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: cmpleps %xmm1, %xmm2 -; SSE2-NEXT: movaps %xmm2, %xmm1 +; SSE2-NEXT: movaps {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] +; SSE2-NEXT: xorps %xmm0, %xmm2 +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: cmpleps %xmm2, %xmm1 +; SSE2-NEXT: andps %xmm1, %xmm2 ; SSE2-NEXT: andnps %xmm0, %xmm1 -; SSE2-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: andps %xmm2, %xmm0 -; SSE2-NEXT: orps %xmm1, %xmm0 +; SSE2-NEXT: orps %xmm2, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: ole: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: xorps %xmm2, %xmm2 -; SSE41-NEXT: cmpleps %xmm2, %xmm0 ; SSE41-NEXT: movaps {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; SSE41-NEXT: xorps %xmm1, %xmm2 +; SSE41-NEXT: xorps %xmm0, %xmm2 +; SSE41-NEXT: cmpleps %xmm2, %xmm0 ; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq @@ -486,7 +446,7 @@ define <2 x double> @oge(<2 x double> %x) { ; SSE2: # %bb.0: ; SSE2-NEXT: movapd {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0] ; SSE2-NEXT: xorpd %xmm0, %xmm2 -; SSE2-NEXT: xorpd %xmm1, %xmm1 +; SSE2-NEXT: movapd %xmm2, %xmm1 ; SSE2-NEXT: cmplepd %xmm0, %xmm1 ; SSE2-NEXT: andpd %xmm1, %xmm2 ; SSE2-NEXT: andnpd %xmm0, %xmm1 @@ -499,7 +459,7 @@ define <2 x double> @oge(<2 x double> %x) { ; SSE41-NEXT: movapd %xmm0, %xmm1 ; SSE41-NEXT: movapd {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0] ; SSE41-NEXT: xorpd %xmm0, %xmm2 -; SSE41-NEXT: xorpd %xmm0, %xmm0 +; SSE41-NEXT: movapd %xmm2, %xmm0 ; SSE41-NEXT: cmplepd %xmm1, %xmm0 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 ; SSE41-NEXT: movapd %xmm1, %xmm0 @@ -510,6 +470,8 @@ define <2 x double> @oge(<2 x double> %x) { ret <2 x double> %r } +; negative test - don't create an fneg to replace 0.0 operand + define double @ogt_no_fneg(double %x, double %y) { ; CHECK-LABEL: ogt_no_fneg: ; CHECK: # %bb.0: @@ -524,6 +486,8 @@ define double @ogt_no_fneg(double %x, double %y) { ret double %r } +; negative test - can't change the setcc for non-zero constant + define double @ogt_no_zero(double %x) { ; CHECK-LABEL: ogt_no_zero: ; CHECK: # %bb.0: -- 2.7.4