[AArch64][SVE] Consider more intrinsics in 'isZeroingInactiveLanes'.

author Sander de Smalen <sander.desmalen@arm.com>

Fri, 15 Jul 2022 12:53:42 +0000 (13:53 +0100)

committer Sander de Smalen <sander.desmalen@arm.com>

Tue, 26 Jul 2022 14:07:41 +0000 (15:07 +0100)
author Sander de Smalen <sander.desmalen@arm.com>
Fri, 15 Jul 2022 12:53:42 +0000 (13:53 +0100)
committer Sander de Smalen <sander.desmalen@arm.com>
Tue, 26 Jul 2022 14:07:41 +0000 (15:07 +0100)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

index bc57afd..7df43c3 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -255,6 +255,12 @@ static bool isZeroingInactiveLanes(SDValue Op) {
        return false;
      case Intrinsic::aarch64_sve_ptrue:
      case Intrinsic::aarch64_sve_pnext:
+    case Intrinsic::aarch64_sve_cmpeq:
+    case Intrinsic::aarch64_sve_cmpne:
+    case Intrinsic::aarch64_sve_cmpge:
+    case Intrinsic::aarch64_sve_cmpgt:
+    case Intrinsic::aarch64_sve_cmphs:
+    case Intrinsic::aarch64_sve_cmphi:
      case Intrinsic::aarch64_sve_cmpeq_wide:
      case Intrinsic::aarch64_sve_cmpne_wide:
      case Intrinsic::aarch64_sve_cmpge_wide:
@@ -265,6 +271,11 @@ static bool isZeroingInactiveLanes(SDValue Op) {
      case Intrinsic::aarch64_sve_cmphi_wide:
      case Intrinsic::aarch64_sve_cmplo_wide:
      case Intrinsic::aarch64_sve_cmpls_wide:
+    case Intrinsic::aarch64_sve_fcmpeq:
+    case Intrinsic::aarch64_sve_fcmpne:
+    case Intrinsic::aarch64_sve_fcmpge:
+    case Intrinsic::aarch64_sve_fcmpgt:
+    case Intrinsic::aarch64_sve_fcmpuo:
        return true;
      }
    }
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll

index 48c1255..d0ea1dd 100644 (file)
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
@@ -20,10 +20,7 @@ define i32 @cmpeq_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
  define i32 @cmpeq_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
  ; CHECK-LABEL: cmpeq_nxv4i32:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    and p1.b, p0/z, p0.b, p1.b
  ; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p1, p0.b
  ; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpeq.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll

index 77ee75e..5dae689 100644 (file)
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
@@ -20,10 +20,7 @@ define i32 @cmpge_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
  define i32 @cmpge_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
  ; CHECK-LABEL: cmpge_nxv4i32:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    and p1.b, p0/z, p0.b, p1.b
  ; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p1, p0.b
  ; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll

index f8ca490..c2dc452 100644 (file)
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
@@ -20,10 +20,7 @@ define i32 @cmpgt_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
  define i32 @cmpgt_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
  ; CHECK-LABEL: cmpgt_nxv4i32:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    and p1.b, p0/z, p0.b, p1.b
  ; CHECK-NEXT:    cmpgt p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p1, p0.b
  ; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpgt.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll

index ff9f627..e4b4592 100644 (file)
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
@@ -20,10 +20,7 @@ define i32 @cmphi_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
  define i32 @cmphi_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
  ; CHECK-LABEL: cmphi_nxv4i32:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    and p1.b, p0/z, p0.b, p1.b
  ; CHECK-NEXT:    cmphi p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p1, p0.b
  ; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmphi.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll

index 7c30aff..42906f6 100644 (file)
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
@@ -20,10 +20,7 @@ define i32 @cmphs_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
  define i32 @cmphs_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
  ; CHECK-LABEL: cmphs_nxv4i32:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    and p1.b, p0/z, p0.b, p1.b
  ; CHECK-NEXT:    cmphs p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p1, p0.b
  ; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmphs.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll

index a95a9a0..ba4bd4b 100644 (file)
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
@@ -20,10 +20,7 @@ define i32 @cmpne_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
  define i32 @cmpne_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
  ; CHECK-LABEL: cmpne_nxv4i32:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    and p1.b, p0/z, p0.b, p1.b
  ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p1, p0.b
  ; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest.ll b/llvm/test/CodeGen/AArch64/sve-ptest.ll

index 4c4d8b2..ec51488 100644 (file)
--- a/llvm/test/CodeGen/AArch64/sve-ptest.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest.ll
@@ -7,10 +7,8 @@
  define i32 @fcmpeq_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
  ; CHECK-LABEL: fcmpeq_nxv4f32:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    and p1.b, p0/z, p0.b, p1.b
-; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    fcmeq p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ptest p0, p1.b
  ; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpeq.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -22,10 +20,8 @@ define i32 @fcmpeq_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vsca
  define i32 @fcmpne_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
  ; CHECK-LABEL: fcmpne_nxv4f32:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    and p1.b, p0/z, p0.b, p1.b
-; CHECK-NEXT:    fcmne p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    fcmne p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ptest p0, p1.b
  ; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpne.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -37,10 +33,8 @@ define i32 @fcmpne_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vsca
  define i32 @fcmpge_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
  ; CHECK-LABEL: fcmpge_nxv4f32:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    and p1.b, p0/z, p0.b, p1.b
-; CHECK-NEXT:    fcmge p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ptest p0, p1.b
  ; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpge.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -52,10 +46,8 @@ define i32 @fcmpge_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vsca
  define i32 @fcmpgt_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
  ; CHECK-LABEL: fcmpgt_nxv4f32:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    and p1.b, p0/z, p0.b, p1.b
-; CHECK-NEXT:    fcmgt p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    fcmgt p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ptest p0, p1.b
  ; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpgt.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -67,10 +59,8 @@ define i32 @fcmpgt_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vsca
  define i32 @fcmpuo_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
  ; CHECK-LABEL: fcmpuo_nxv4f32:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    and p1.b, p0/z, p0.b, p1.b
-; CHECK-NEXT:    fcmuo p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    fcmuo p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ptest p0, p1.b
  ; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpuo.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
author	Sander de Smalen <sander.desmalen@arm.com>
	Fri, 15 Jul 2022 12:53:42 +0000 (13:53 +0100)
committer	Sander de Smalen <sander.desmalen@arm.com>
	Tue, 26 Jul 2022 14:07:41 +0000 (15:07 +0100)
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/sve-ptest.ll		patch \| blob \| history