From: Eli Friedman <efriedma@quicinc.com>
Date: Wed, 23 Sep 2020 21:10:33 +0000 (-0700)
Subject: [SelectionDAG][GISel] Make LegalizeDAG lower FNEG using integer ops.
X-Git-Tag: llvmorg-13-init~11115
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3f739f736b8fed6f4d63569f56c985ef04b21cd1;p=platform%2Fupstream%2Fllvm.git

[SelectionDAG][GISel] Make LegalizeDAG lower FNEG using integer ops.

Previously, if a floating-point type was legal, but FNEG wasn't legal,
we would use FSUB.  Instead, we should use integer ops, to preserve the
semantics.  (Alternatively, there's a compiler-rt call we could use, but
there isn't much reason to use that.)

It turns out we actually are still using this obscure codepath in a few
cases: on some targets, we have "legal" floating-point types that don't
actually support any floating-point operations.  In particular, ARM and
AArch64 are using this path.

The implementation for SelectionDAG is pretty simple because we can
reuse the infrastructure from FCOPYSIGN.

See also 9a3dc3e, the corresponding change to type legalization.

Also includes a "bonus" change to STRICT_FSUB legalization, so we can
lower a STRICT_FSUB to a float libcall.

Includes the changes to both LegalizeDAG and GlobalISel so we don't have
inconsistent results in the future.

Fixes https://bugs.llvm.org/show_bug.cgi?id=46792 .

Differential Revision: https://reviews.llvm.org/D84287
---

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index f4c8acc..196dbf2 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2881,16 +2881,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
     // represent them.
     if (Ty.isVector())
       return UnableToLegalize;
-    LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
-    Type *ZeroTy = getFloatTypeForLLT(Ctx, Ty);
-    if (!ZeroTy)
-      return UnableToLegalize;
-    ConstantFP &ZeroForNegation =
-        *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
-    auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
+    auto SignMask =
+        MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
     Register SubByReg = MI.getOperand(1).getReg();
-    Register ZeroReg = Zero.getReg(0);
-    MIRBuilder.buildFSub(Res, ZeroReg, SubByReg, MI.getFlags());
+    MIRBuilder.buildXor(Res, SubByReg, SignMask);
     MI.eraseFromParent();
     return Legalized;
   }
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 9a71848..83ade2d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -173,6 +173,7 @@ private:
                           SDValue NewIntValue) const;
   SDValue ExpandFCOPYSIGN(SDNode *Node) const;
   SDValue ExpandFABS(SDNode *Node) const;
+  SDValue ExpandFNEG(SDNode *Node) const;
   SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain);
   void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl,
                              SmallVectorImpl<SDValue> &Results);
@@ -1573,6 +1574,22 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
   return modifySignAsInt(MagAsInt, DL, CopiedSign);
 }
 
+SDValue SelectionDAGLegalize::ExpandFNEG(SDNode *Node) const {
+  // Get the sign bit as an integer.
+  SDLoc DL(Node);
+  FloatSignAsInt SignAsInt;
+  getSignAsIntValue(SignAsInt, DL, Node->getOperand(0));
+  EVT IntVT = SignAsInt.IntValue.getValueType();
+
+  // Flip the sign.
+  SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT);
+  SDValue SignFlip =
+      DAG.getNode(ISD::XOR, DL, IntVT, SignAsInt.IntValue, SignMask);
+
+  // Convert back to float.
+  return modifySignAsInt(SignAsInt, DL, SignFlip);
+}
+
 SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const {
   SDLoc DL(Node);
   SDValue Value = Node->getOperand(0);
@@ -3252,12 +3269,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(ExpandFCOPYSIGN(Node));
     break;
   case ISD::FNEG:
-    // Expand Y = FNEG(X) ->  Y = SUB -0.0, X
-    Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0));
-    // TODO: If FNEG has fast-math-flags, propagate them to the FSUB.
-    Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
-                       Node->getOperand(0));
-    Results.push_back(Tmp1);
+    Results.push_back(ExpandFNEG(Node));
     break;
   case ISD::FABS:
     Results.push_back(ExpandFABS(Node));
@@ -3942,10 +3954,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         return true;
       break;
     case ISD::STRICT_FSUB: {
-      if (TLI.getStrictFPOperationAction(Node->getOpcode(),
-                                         Node->getValueType(0))
-          == TargetLowering::Legal)
+      if (TLI.getStrictFPOperationAction(
+              ISD::STRICT_FSUB, Node->getValueType(0)) == TargetLowering::Legal)
         return true;
+      if (TLI.getStrictFPOperationAction(
+              ISD::STRICT_FADD, Node->getValueType(0)) != TargetLowering::Legal)
+        break;
 
       EVT VT = Node->getValueType(0);
       const SDNodeFlags Flags = Node->getFlags();
diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index f365715..d9b60f4 100644
--- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -88,7 +88,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
 
   getActionDefinitionsBuilder({G_MUL, G_AND, G_OR, G_XOR})
       .legalFor({s32})
-      .minScalar(0, s32);
+      .clampScalar(0, s32, s32);
 
   if (ST.hasNEON())
     getActionDefinitionsBuilder({G_ADD, G_SUB})
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
index 3561d8f..ad53663 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
@@ -262,19 +262,17 @@ define void @test_extend() {
 }
 
 define fp128 @test_neg(fp128 %in) {
-; CHECK: [[$MINUS0:.LCPI[0-9]+_0]]:
-; Make sure the weird hex constant below *is* -0.0
-; CHECK-NEXT: fp128 -0
-
 ; CHECK-LABEL: test_neg:
 
-  ; Could in principle be optimized to fneg which we can't select, this makes
-  ; sure that doesn't happen.
+;; We convert this to fneg, and target-independent code expands it with
+;; integer operations.
   %ret = fsub fp128 0xL00000000000000008000000000000000, %in
-; CHECK: mov v1.16b, v0.16b
-; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:[[$MINUS0]]]
-; CHECK: bl __subtf3
-
   ret fp128 %ret
-; CHECK: ret
+
+; CHECK:      str q0, [sp, #-16]!
+; CHECK-NEXT: ldrb w8, [sp, #15]
+; CHECK-NEXT: eor w8, w8, #0x80
+; CHECK-NEXT: strb w8, [sp, #15]
+; CHECK-NEXT: ldr q0, [sp], #16
+; CHECK-NEXT: ret
 }
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-binops.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-binops.mir
index dfbbdce..199b72a 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-binops.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-binops.mir
@@ -16,14 +16,17 @@
   define void @test_and_s8() { ret void }
   define void @test_and_s16() { ret void }
   define void @test_and_s32() { ret void }
+  define void @test_and_s64() { ret void }
 
   define void @test_or_s8() { ret void }
   define void @test_or_s16() { ret void }
   define void @test_or_s32() { ret void }
+  define void @test_or_s64() { ret void }
 
   define void @test_xor_s8() { ret void }
   define void @test_xor_s16() { ret void }
   define void @test_xor_s32() { ret void }
+  define void @test_xor_s64() { ret void }
 
   define void @test_lshr_s32() { ret void }
   define void @test_ashr_s32() { ret void }
@@ -391,6 +394,41 @@ body:             |
 
 ...
 ---
+name:            test_and_s64
+# CHECK-LABEL: name: test_and_s64
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+  - { id: 6, class: _ }
+  - { id: 7, class: _ }
+  - { id: 8, class: _ }
+body:             |
+  bb.0:
+    liveins: $r0, $r1, $r2, $r3
+
+    %0(s32) = COPY $r0
+    %1(s32) = COPY $r1
+    %2(s32) = COPY $r2
+    %3(s32) = COPY $r3
+    %4(s64) = G_MERGE_VALUES %0(s32), %1(s32)
+    %5(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %6(s64) = G_AND %4, %5
+    %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64)
+    $r0 = COPY %7(s32)
+    $r1 = COPY %8(s32)
+    BX_RET 14, $noreg, implicit $r0, implicit $r1
+
+...
+---
 name:            test_or_s8
 # CHECK-LABEL: name: test_or_s8
 legalized:       false
@@ -480,6 +518,41 @@ body:             |
 
 ...
 ---
+name:            test_or_s64
+# CHECK-LABEL: name: test_or_s64
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+  - { id: 6, class: _ }
+  - { id: 7, class: _ }
+  - { id: 8, class: _ }
+body:             |
+  bb.0:
+    liveins: $r0, $r1, $r2, $r3
+
+    %0(s32) = COPY $r0
+    %1(s32) = COPY $r1
+    %2(s32) = COPY $r2
+    %3(s32) = COPY $r3
+    %4(s64) = G_MERGE_VALUES %0(s32), %1(s32)
+    %5(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %6(s64) = G_OR %4, %5
+    %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64)
+    $r0 = COPY %7(s32)
+    $r1 = COPY %8(s32)
+    BX_RET 14, $noreg, implicit $r0, implicit $r1
+
+...
+---
 name:            test_xor_s8
 # CHECK-LABEL: name: test_xor_s8
 legalized:       false
@@ -569,6 +642,41 @@ body:             |
 
 ...
 ---
+name:            test_xor_s64
+# CHECK-LABEL: name: test_xor_s64
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+  - { id: 6, class: _ }
+  - { id: 7, class: _ }
+  - { id: 8, class: _ }
+body:             |
+  bb.0:
+    liveins: $r0, $r1, $r2, $r3
+
+    %0(s32) = COPY $r0
+    %1(s32) = COPY $r1
+    %2(s32) = COPY $r2
+    %3(s32) = COPY $r3
+    %4(s64) = G_MERGE_VALUES %0(s32), %1(s32)
+    %5(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %6(s64) = G_XOR %4, %5
+    %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64)
+    $r0 = COPY %7(s32)
+    $r1 = COPY %8(s32)
+    BX_RET 14, $noreg, implicit $r0, implicit $r1
+
+...
+---
 name:            test_lshr_s32
 # CHECK-LABEL: name: test_lshr_s32
 legalized:       false
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir
index 8b85b45..8038d73 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir
@@ -689,16 +689,8 @@ body:             |
     ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY $r0
     %0(s32) = COPY $r0
     ; HARD: [[R:%[0-9]+]]:_(s32) = G_FNEG [[X]]
-    ; SOFT-NOT: G_FNEG
-    ; SOFT-DAG: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
-    ; SOFT: ADJCALLSTACKDOWN
-    ; SOFT-DAG: $r0 = COPY [[ZERO]]
-    ; SOFT-DAG: $r1 = COPY [[X]]
-    ; SOFT-AEABI: BL{{.*}} &__aeabi_fsub, {{.*}}, implicit $r0, implicit $r1, implicit-def $r0
-    ; SOFT-DEFAULT: BL{{.*}} &__subsf3, {{.*}}, implicit $r0, implicit $r1, implicit-def $r0
-    ; SOFT: [[R:%[0-9]+]]:_(s32) = COPY $r0
-    ; SOFT: ADJCALLSTACKUP
-    ; SOFT-NOT: G_FNEG
+    ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
+    ; SOFT: [[R:%[0-9]+]]:_(s32) = G_XOR [[X]], [[ZERO]]
     %1(s32) = G_FNEG %0
     ; CHECK: $r0 = COPY [[R]]
     $r0 = COPY %1(s32)
@@ -730,20 +722,14 @@ body:             |
     ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]]
     %2(s64) = G_MERGE_VALUES %0(s32), %1(s32)
     ; HARD: [[R:%[0-9]+]]:_(s64) = G_FNEG [[X]]
-    ; SOFT-NOT: G_FNEG
-    ; SOFT-DAG: [[NEGATIVE_ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
-    ; SOFT-DAG: [[POSITIVE_ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; SOFT: ADJCALLSTACKDOWN
-    ; SOFT-DAG: $r{{[0-1]}} = COPY [[NEGATIVE_ZERO]]
-    ; SOFT-DAG: $r{{[0-1]}} = COPY [[POSITIVE_ZERO]]
-    ; SOFT-DAG: $r{{[2-3]}} = COPY [[X0]]
-    ; SOFT-DAG: $r{{[2-3]}} = COPY [[X1]]
-    ; SOFT-AEABI: BL{{.*}} &__aeabi_dsub, {{.*}}, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1
-    ; SOFT-DEFAULT: BL{{.*}} &__subdf3, {{.*}}, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1
-    ; SOFT: ADJCALLSTACKUP
-    ; SOFT-NOT: G_FNEG
+    ; HARD: G_UNMERGE_VALUES [[R]](s64)
+    ; SOFT: [[POSITIVE_ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; SOFT: [[NEGATIVE_ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
+    ; SOFT: [[LOWR:%[0-9]+]]:_(s32) = G_XOR [[X0]], [[POSITIVE_ZERO]]
+    ; SOFT: [[HIGHR:%[0-9]+]]:_(s32) = G_XOR [[X1]], [[NEGATIVE_ZERO]]
+    ; SOFT: $r0 = COPY [[LOWR]]
+    ; SOFT: $r1 = COPY [[HIGHR]]
     %3(s64) = G_FNEG %2
-    ; HARD-DAG: G_UNMERGE_VALUES [[R]](s64)
     %4(s32),%5(s32) = G_UNMERGE_VALUES %3(s64)
     $r0 = COPY %4(s32)
     $r1 = COPY %5(s32)
diff --git a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
index 50d8752..f88242f 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
@@ -67,31 +67,20 @@ entry:
 define arm_aapcs_vfpcc <2 x double> @fneg_float64_t(<2 x double> %src) {
 ; CHECK-LABEL: fneg_float64_t:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r7, lr}
-; CHECK-NEXT:    .vsave {d8, d9}
-; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    vldr d0, .LCPI2_0
-; CHECK-NEXT:    vmov r2, r3, d9
-; CHECK-NEXT:    vmov r4, r5, d0
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_dsub
-; CHECK-NEXT:    vmov r2, r3, d8
-; CHECK-NEXT:    vmov d9, r0, r1
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    mov r1, r5
-; CHECK-NEXT:    bl __aeabi_dsub
-; CHECK-NEXT:    vmov d8, r0, r1
-; CHECK-NEXT:    vmov q0, q4
-; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    pop {r4, r5, r7, pc}
-; CHECK-NEXT:    .p2align 3
-; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:  .LCPI2_0:
-; CHECK-NEXT:    .long 0 @ double -0
-; CHECK-NEXT:    .long 2147483648
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    vstr d1, [sp]
+; CHECK-NEXT:    ldrb.w r0, [sp, #7]
+; CHECK-NEXT:    vstr d0, [sp, #8]
+; CHECK-NEXT:    ldrb.w r1, [sp, #15]
+; CHECK-NEXT:    eor r0, r0, #128
+; CHECK-NEXT:    strb.w r0, [sp, #7]
+; CHECK-NEXT:    vldr d1, [sp]
+; CHECK-NEXT:    eor r0, r1, #128
+; CHECK-NEXT:    strb.w r0, [sp, #15]
+; CHECK-NEXT:    vldr d0, [sp, #8]
+; CHECK-NEXT:    add sp, #16
+; CHECK-NEXT:    bx lr
 entry:
   %0 = fsub nnan ninf nsz <2 x double> <double 0.0e0, double 0.0e0>, %src
   ret <2 x double> %0
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-fneg.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-fneg.mir
index dabe3ac..0f7a59b 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/legalize-fneg.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-fneg.mir
@@ -22,9 +22,9 @@ body:             |
     liveins:
     ; CHECK-LABEL: name: test_fneg_f32
     ; CHECK: [[DEF:%[0-9]+]]:_(s32) = IMPLICIT_DEF
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -0.000000e+00
-    ; CHECK: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[C]], [[DEF]]
-    ; CHECK: $edi = COPY [[FSUB]](s32)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
+    ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[C]]
+    ; CHECK: $edi = COPY [[XOR]](s32)
     %0(s32) = IMPLICIT_DEF
     %1(s32) = G_FNEG %0
     $edi = COPY %1
@@ -39,9 +39,9 @@ body:             |
     liveins:
     ; CHECK-LABEL: name: test_fneg_f64
     ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -0.000000e+00
-    ; CHECK: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[C]], [[DEF]]
-    ; CHECK: $rdi = COPY [[FSUB]](s64)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
+    ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[C]]
+    ; CHECK: $rdi = COPY [[XOR]](s64)
     %0(s64) = G_IMPLICIT_DEF
     %1(s64) = G_FNEG %0
     $rdi = COPY %1