[PowerPC] remove side effect for some cases for saturate instructions

author Chen Zheng <czhengsz@cn.ibm.com>

Tue, 7 Mar 2023 02:31:32 +0000 (21:31 -0500)

committer Chen Zheng <czhengsz@cn.ibm.com>

Tue, 14 Mar 2023 01:37:56 +0000 (21:37 -0400)
author Chen Zheng <czhengsz@cn.ibm.com>
Tue, 7 Mar 2023 02:31:32 +0000 (21:31 -0500)
committer Chen Zheng <czhengsz@cn.ibm.com>
Tue, 14 Mar 2023 01:37:56 +0000 (21:37 -0400)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

index cb3b31e..4011aaf 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15787,16 +15787,37 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
  
      break;
    case ISD::INTRINSIC_W_CHAIN:
-    // For little endian, VSX loads require generating lxvd2x/xxswapd.
-    // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
-    if (Subtarget.needsSwapsForVSXMemOps()) {
-      switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
-      default:
-        break;
-      case Intrinsic::ppc_vsx_lxvw4x:
-      case Intrinsic::ppc_vsx_lxvd2x:
-        return expandVSXLoadForLE(N, DCI);
+    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+    default:
+      break;
+    case Intrinsic::ppc_altivec_vsum4sbs:
+    case Intrinsic::ppc_altivec_vsum4shs:
+    case Intrinsic::ppc_altivec_vsum4ubs: {
+      // These sum-across intrinsics only have a chain due to the side effect
+      // that they may set the SAT bit. If we know the SAT bit will not be set
+      // for some inputs, we can replace any uses of their chain with the input
+      // chain.
+      if (BuildVectorSDNode *BVN =
+              dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
+        APInt APSplatBits, APSplatUndef;
+        unsigned SplatBitSize;
+        bool HasAnyUndefs;
+        bool BVNIsConstantSplat = BVN->isConstantSplat(
+            APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
+            !Subtarget.isLittleEndian());
+        // If the constant splat vector is 0, the SAT bit will not be set.
+        if (BVNIsConstantSplat && APSplatBits == 0)
+          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
        }
+      return SDValue();
+    }
+    case Intrinsic::ppc_vsx_lxvw4x:
+    case Intrinsic::ppc_vsx_lxvd2x:
+      // For little endian, VSX loads require generating lxvd2x/xxswapd.
+      // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
+      if (Subtarget.needsSwapsForVSXMemOps())
+        return expandVSXLoadForLE(N, DCI);
+      break;
      }
      break;
    case ISD::INTRINSIC_VOID:
diff --git a/llvm/test/CodeGen/PowerPC/vector-sum-sat-bit-side-effect.ll b/llvm/test/CodeGen/PowerPC/vector-sum-sat-bit-side-effect.ll

index 12f4a71..e371d7c 100644 (file)
--- a/llvm/test/CodeGen/PowerPC/vector-sum-sat-bit-side-effect.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-sum-sat-bit-side-effect.ll
@@ -9,8 +9,6 @@
  define void @test1(<16 x i8> %0) {
  ; CHECK-LABEL: test1:
  ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlxor v3, v3, v3
-; CHECK-NEXT:    vsum4sbs v2, v2, v3
  ; CHECK-NEXT:    blr
  entry:
    %1 = tail call <4 x i32> @llvm.ppc.altivec.vsum4sbs(<16 x i8> %0, <4 x i32> zeroinitializer)
@@ -20,8 +18,6 @@ entry:
  define void @test2(<8 x i16> %0) {
  ; CHECK-LABEL: test2:
  ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlxor v3, v3, v3
-; CHECK-NEXT:    vsum4shs v2, v2, v3
  ; CHECK-NEXT:    blr
  entry:
    %1 = tail call <4 x i32> @llvm.ppc.altivec.vsum4shs(<8 x i16> %0, <4 x i32> zeroinitializer)
@@ -31,8 +27,6 @@ entry:
  define void @test3(<16 x i8> %0) {
  ; CHECK-LABEL: test3:
  ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlxor v3, v3, v3
-; CHECK-NEXT:    vsum4ubs v2, v2, v3
  ; CHECK-NEXT:    blr
  entry:
    %1 = tail call <4 x i32> @llvm.ppc.altivec.vsum4ubs(<16 x i8> %0, <4 x i32> zeroinitializer)
@@ -108,9 +102,8 @@ entry:
  define <4 x i32> @test10(<16 x i8> %0, <16 x i8> %1) {
  ; CHECK-LABEL: test10:
  ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlxor v4, v4, v4
-; CHECK-NEXT:    vsum4sbs v2, v2, v4
-; CHECK-NEXT:    vsum4sbs v3, v3, v4
+; CHECK-NEXT:    xxlxor v3, v3, v3
+; CHECK-NEXT:    vsum4sbs v2, v2, v3
  ; CHECK-NEXT:    blr
  entry:
    %2 = tail call <4 x i32> @llvm.ppc.altivec.vsum4sbs(<16 x i8> %0, <4 x i32> zeroinitializer)
@@ -121,9 +114,8 @@ entry:
  define <4 x i32> @test11(<8 x i16> %0, <8 x i16> %1) {
  ; CHECK-LABEL: test11:
  ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlxor v4, v4, v4
-; CHECK-NEXT:    vsum4shs v2, v2, v4
-; CHECK-NEXT:    vsum4shs v3, v3, v4
+; CHECK-NEXT:    xxlxor v3, v3, v3
+; CHECK-NEXT:    vsum4shs v2, v2, v3
  ; CHECK-NEXT:    blr
  entry:
    %2 = tail call <4 x i32> @llvm.ppc.altivec.vsum4shs(<8 x i16> %0, <4 x i32> zeroinitializer)
@@ -134,9 +126,8 @@ entry:
  define <4 x i32> @test12(<16 x i8> %0, <16 x i8> %1) {
  ; CHECK-LABEL: test12:
  ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlxor v4, v4, v4
-; CHECK-NEXT:    vsum4ubs v2, v2, v4
-; CHECK-NEXT:    vsum4ubs v3, v3, v4
+; CHECK-NEXT:    xxlxor v3, v3, v3
+; CHECK-NEXT:    vsum4ubs v2, v2, v3
  ; CHECK-NEXT:    blr
  entry:
    %2 = tail call <4 x i32> @llvm.ppc.altivec.vsum4ubs(<16 x i8> %0, <4 x i32> zeroinitializer)
author	Chen Zheng <czhengsz@cn.ibm.com>
	Tue, 7 Mar 2023 02:31:32 +0000 (21:31 -0500)
committer	Chen Zheng <czhengsz@cn.ibm.com>
	Tue, 14 Mar 2023 01:37:56 +0000 (21:37 -0400)
llvm/lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/PowerPC/vector-sum-sat-bit-side-effect.ll		patch \| blob \| history