From 26b2c114515a8d011a952fe414ac92417298ea00 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 9 Aug 2019 21:37:32 +0000 Subject: [PATCH] [DAGCombiner] exclude x*2.0 from normal negation profitability rules This is the codegen part of fixing: https://bugs.llvm.org/show_bug.cgi?id=32939 Even with the optimal/canonical IR that is ideally created by D65954, we would reverse that transform in DAGCombiner and end up with the same asm on AArch64 or x86. I see 2 options for trying to correct this: 1. Limit isNegatibleForFree() by special-casing the fmul pattern (this patch). 2. Avoid creating (fmul X, 2.0) in the 1st place by adding a special-case transform to SelectionDAG::getNode() and/or SelectionDAGBuilder::visitFMul() that matches the transform done by DAGCombiner. This seems like the less intrusive patch, but if there's some other reason to prefer 1 option over the other, we can change to the other option. Differential Revision: https://reviews.llvm.org/D66016 llvm-svn: 368490 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +++++ llvm/test/CodeGen/AArch64/fadd-combines.ll | 10 ++++------ llvm/test/CodeGen/X86/fadd-combines.ll | 8 ++++---- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 63431d5..d9a7789 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -868,6 +868,11 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, Options, ForCodeSize, Depth + 1)) return V; + // Ignore X * 2.0 because that is expected to be canonicalized to X + X. + if (auto *C = isConstOrConstSplatFP(Op.getOperand(1))) + if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL) + return 0; + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, ForCodeSize, Depth + 1); diff --git a/llvm/test/CodeGen/AArch64/fadd-combines.ll b/llvm/test/CodeGen/AArch64/fadd-combines.ll index 7e34d2e..640c1a1 100644 --- a/llvm/test/CodeGen/AArch64/fadd-combines.ll +++ b/llvm/test/CodeGen/AArch64/fadd-combines.ll @@ -169,10 +169,9 @@ define float @fadd_const_multiuse_attr(float %x) { define double @fmul2_negated(double %a, double %b, double %c) { ; CHECK-LABEL: fmul2_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d3, #-2.00000000 -; CHECK-NEXT: fmul d1, d1, d3 +; CHECK-NEXT: fadd d1, d1, d1 ; CHECK-NEXT: fmul d1, d1, d2 -; CHECK-NEXT: fadd d0, d0, d1 +; CHECK-NEXT: fsub d0, d0, d1 ; CHECK-NEXT: ret %mul = fmul double %b, 2.0 %mul1 = fmul double %mul, %c @@ -183,10 +182,9 @@ define double @fmul2_negated(double %a, double %b, double %c) { define <2 x double> @fmul2_negated_vec(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: fmul2_negated_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov v3.2d, #-2.00000000 -; CHECK-NEXT: fmul v1.2d, v1.2d, v3.2d +; CHECK-NEXT: fadd v1.2d, v1.2d, v1.2d ; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d -; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d +; CHECK-NEXT: fsub v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %mul = fmul <2 x double> %b, %mul1 = fmul <2 x double> %mul, %c diff --git a/llvm/test/CodeGen/X86/fadd-combines.ll b/llvm/test/CodeGen/X86/fadd-combines.ll index 07e203f..048e5c5 100644 --- a/llvm/test/CodeGen/X86/fadd-combines.ll +++ b/llvm/test/CodeGen/X86/fadd-combines.ll @@ -252,9 +252,9 @@ define float @fadd_const_multiuse_attr(float %x) #0 { define double @fmul2_negated(double %a, double %b, double %c) { ; CHECK-LABEL: fmul2_negated: ; CHECK: # %bb.0: -; CHECK-NEXT: mulsd {{.*}}(%rip), %xmm1 +; CHECK-NEXT: addsd %xmm1, %xmm1 ; CHECK-NEXT: mulsd %xmm2, %xmm1 -; CHECK-NEXT: addsd %xmm1, %xmm0 +; CHECK-NEXT: subsd %xmm1, %xmm0 ; CHECK-NEXT: retq %mul = fmul double %b, 2.0 %mul1 = fmul double %mul, %c @@ -265,9 +265,9 @@ define double @fmul2_negated(double %a, double %b, double %c) { define <2 x double> @fmul2_negated_vec(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: fmul2_negated_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm1 +; CHECK-NEXT: addpd %xmm1, %xmm1 ; CHECK-NEXT: mulpd %xmm2, %xmm1 -; CHECK-NEXT: addpd %xmm1, %xmm0 +; CHECK-NEXT: subpd %xmm1, %xmm0 ; CHECK-NEXT: retq %mul = fmul <2 x double> %b, %mul1 = fmul <2 x double> %mul, %c -- 2.7.4