[AArch64] Generate FADDP from shuffled fadd

author David Green <david.green@arm.com>

Sat, 11 Jun 2022 13:16:37 +0000 (14:16 +0100)

committer David Green <david.green@arm.com>

Sat, 11 Jun 2022 13:16:37 +0000 (14:16 +0100)
author David Green <david.green@arm.com>
Sat, 11 Jun 2022 13:16:37 +0000 (14:16 +0100)
committer David Green <david.green@arm.com>
Sat, 11 Jun 2022 13:16:37 +0000 (14:16 +0100)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

index f75c5e8..c855d12 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1071,6 +1071,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
      // ADDP custom lowering
      for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
        setOperationAction(ISD::ADD, VT, Custom);
+    // FADDP custom lowering
+    for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
+      setOperationAction(ISD::FADD, VT, Custom);
    }
  
    if (Subtarget->hasSVE()) {
@@ -19317,9 +19320,13 @@ void AArch64TargetLowering::ReplaceBITCASTResults(
  }
  
  static void ReplaceAddWithADDP(SDNode *N, SmallVectorImpl<SDValue> &Results,
-                               SelectionDAG &DAG) {
+                               SelectionDAG &DAG,
+                               const AArch64Subtarget *Subtarget) {
    EVT VT = N->getValueType(0);
-  if (!VT.is256BitVector())
+  if (!VT.is256BitVector() ||
+      (VT.getScalarType().isFloatingPoint() &&
+       !N->getFlags().hasAllowReassociation()) ||
+      (VT.getScalarType() == MVT::f16 && !Subtarget->hasFullFP16()))
      return;
  
    SDValue X = N->getOperand(0);
@@ -19537,7 +19544,8 @@ void AArch64TargetLowering::ReplaceNodeResults(
      Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
      return;
    case ISD::ADD:
-    ReplaceAddWithADDP(N, Results, DAG);
+  case ISD::FADD:
+    ReplaceAddWithADDP(N, Results, DAG, Subtarget);
      return;
  
    case ISD::CTPOP:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td

index 00c0d74..65ca797 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -706,6 +706,9 @@ def AArch64uaddlp   : PatFrags<(ops node:$src),
  def AArch64saddlp   : PatFrags<(ops node:$src),
                                 [(AArch64saddlp_n node:$src),
                                  (int_aarch64_neon_saddlp node:$src)]>;
+def AArch64faddp     : PatFrags<(ops node:$Rn, node:$Rm),
+                                [(AArch64addp_n node:$Rn, node:$Rm),
+                                 (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>;
  
  def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
  def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -4535,7 +4538,7 @@ def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, V
  }
  defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
  defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
-defm FADDP   : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_faddp>;
+defm FADDP   : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>;
  defm FADD    : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>;
  defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
  defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
diff --git a/llvm/test/CodeGen/AArch64/faddp-half.ll b/llvm/test/CodeGen/AArch64/faddp-half.ll

index 8413ab0..4cb9832 100644 (file)
--- a/llvm/test/CodeGen/AArch64/faddp-half.ll
+++ b/llvm/test/CodeGen/AArch64/faddp-half.ll
@@ -214,10 +214,9 @@ entry:
  define <16 x half> @addp_v16f16(<16 x half> %a) {
  ; CHECK-LABEL: addp_v16f16:
  ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev32 v2.8h, v0.8h
-; CHECK-NEXT:    rev32 v3.8h, v1.8h
-; CHECK-NEXT:    fadd v0.8h, v2.8h, v0.8h
-; CHECK-NEXT:    fadd v1.8h, v3.8h, v1.8h
+; CHECK-NEXT:    faddp v1.8h, v0.8h, v1.8h
+; CHECK-NEXT:    zip1 v0.8h, v1.8h, v1.8h
+; CHECK-NEXT:    zip2 v1.8h, v1.8h, v1.8h
  ; CHECK-NEXT:    ret
  ;
  ; CHECKNOFP16-LABEL: addp_v16f16:
diff --git a/llvm/test/CodeGen/AArch64/faddp.ll b/llvm/test/CodeGen/AArch64/faddp.ll

index 3802b2f..2da2e8f 100644 (file)
--- a/llvm/test/CodeGen/AArch64/faddp.ll
+++ b/llvm/test/CodeGen/AArch64/faddp.ll
@@ -191,10 +191,9 @@ entry:
  define <4 x double> @addp_v4f64(<4 x double> %a) {
  ; CHECK-LABEL: addp_v4f64:
  ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    ext v3.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT:    fadd v0.2d, v2.2d, v0.2d
-; CHECK-NEXT:    fadd v1.2d, v3.2d, v1.2d
+; CHECK-NEXT:    faddp v1.2d, v0.2d, v1.2d
+; CHECK-NEXT:    dup v0.2d, v1.d[0]
+; CHECK-NEXT:    dup v1.2d, v1.d[1]
  ; CHECK-NEXT:    ret
  entry:
    %s = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
@@ -231,10 +230,9 @@ entry:
  define <8 x float> @addp_v8f32_slow(<8 x float> %a) {
  ; CHECK-LABEL: addp_v8f32_slow:
  ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev64 v2.4s, v0.4s
-; CHECK-NEXT:    rev64 v3.4s, v1.4s
-; CHECK-NEXT:    fadd v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    fadd v1.4s, v3.4s, v1.4s
+; CHECK-NEXT:    faddp v1.4s, v0.4s, v1.4s
+; CHECK-NEXT:    zip1 v0.4s, v1.4s, v1.4s
+; CHECK-NEXT:    zip2 v1.4s, v1.4s, v1.4s
  ; CHECK-NEXT:    ret
  entry:
    %s = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
@@ -245,14 +243,12 @@ entry:
  define <16 x float> @addp_v16f32(<16 x float> %a) {
  ; CHECK-LABEL: addp_v16f32:
  ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev64 v4.4s, v0.4s
-; CHECK-NEXT:    rev64 v5.4s, v1.4s
-; CHECK-NEXT:    rev64 v6.4s, v2.4s
-; CHECK-NEXT:    rev64 v7.4s, v3.4s
-; CHECK-NEXT:    fadd v0.4s, v4.4s, v0.4s
-; CHECK-NEXT:    fadd v1.4s, v5.4s, v1.4s
-; CHECK-NEXT:    fadd v2.4s, v6.4s, v2.4s
-; CHECK-NEXT:    fadd v3.4s, v7.4s, v3.4s
+; CHECK-NEXT:    faddp v3.4s, v2.4s, v3.4s
+; CHECK-NEXT:    faddp v1.4s, v0.4s, v1.4s
+; CHECK-NEXT:    zip1 v2.4s, v3.4s, v3.4s
+; CHECK-NEXT:    zip1 v0.4s, v1.4s, v1.4s
+; CHECK-NEXT:    zip2 v1.4s, v1.4s, v1.4s
+; CHECK-NEXT:    zip2 v3.4s, v3.4s, v3.4s
  ; CHECK-NEXT:    ret
  entry:
    %s = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
author	David Green <david.green@arm.com>
	Sat, 11 Jun 2022 13:16:37 +0000 (14:16 +0100)
committer	David Green <david.green@arm.com>
	Sat, 11 Jun 2022 13:16:37 +0000 (14:16 +0100)
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64InstrInfo.td		patch \| blob \| history
llvm/test/CodeGen/AArch64/faddp-half.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/faddp.ll		patch \| blob \| history