From fef221bf1fb6fc941db9cd1d0ce926f15cdb1c14 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 13 Apr 2022 12:53:15 +0100
Subject: [PATCH] [DAG] Enable SimplifyVBinOp folds on add/sub sat intrinsics

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++++--
 llvm/test/CodeGen/X86/combine-add-ssat.ll     | 8 +++-----
 llvm/test/CodeGen/X86/combine-add-usat.ll     | 8 +++-----
 llvm/test/CodeGen/X86/combine-sub-ssat.ll     | 6 ++----
 llvm/test/CodeGen/X86/combine-sub-usat.ll     | 6 ++----
 5 files changed, 14 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 700a66c..491d29a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2674,7 +2674,8 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
 
   // fold vector ops
   if (VT.isVector()) {
-    // TODO SimplifyVBinOp
+    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+      return FoldedVOp;
 
     // fold (add_sat x, 0) -> x, vector edition
     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
@@ -3748,7 +3749,8 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
 
   // fold vector ops
   if (VT.isVector()) {
-    // TODO SimplifyVBinOp
+    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+      return FoldedVOp;
 
     // fold (sub_sat x, 0) -> x, vector edition
     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
diff --git a/llvm/test/CodeGen/X86/combine-add-ssat.ll b/llvm/test/CodeGen/X86/combine-add-ssat.ll
index 2da2622..4f9ab35 100644
--- a/llvm/test/CodeGen/X86/combine-add-ssat.ll
+++ b/llvm/test/CodeGen/X86/combine-add-ssat.ll
@@ -164,16 +164,14 @@ define <8 x i16> @combine_no_overflow_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
 define <8 x i16> @combine_shuffle_shuffle_v8i16(<8 x i16> %x0, <8 x i16> %y0) {
 ; SSE-LABEL: combine_shuffle_shuffle_v8i16:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm0[3,2,1,0,4,5,6,7]
-; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[3,2,1,0,4,5,6,7]
-; SSE-NEXT:    paddsw %xmm2, %xmm0
+; SSE-NEXT:    paddsw %xmm1, %xmm0
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_shuffle_shuffle_v8i16:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; AVX-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
 ; AVX-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
 ; AVX-NEXT:    retq
   %x1= shufflevector <8 x i16> %x0, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
   %y1 = shufflevector <8 x i16> %y0, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
diff --git a/llvm/test/CodeGen/X86/combine-add-usat.ll b/llvm/test/CodeGen/X86/combine-add-usat.ll
index 1283a14..13bc3b2 100644
--- a/llvm/test/CodeGen/X86/combine-add-usat.ll
+++ b/llvm/test/CodeGen/X86/combine-add-usat.ll
@@ -157,16 +157,14 @@ define <8 x i16> @combine_no_overflow_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
 define <8 x i16> @combine_shuffle_shuffle_v8i16(<8 x i16> %x0, <8 x i16> %y0) {
 ; SSE-LABEL: combine_shuffle_shuffle_v8i16:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm0[3,2,1,0,4,5,6,7]
-; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[3,2,1,0,4,5,6,7]
-; SSE-NEXT:    paddusw %xmm2, %xmm0
+; SSE-NEXT:    paddusw %xmm1, %xmm0
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_shuffle_shuffle_v8i16:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; AVX-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
 ; AVX-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
 ; AVX-NEXT:    retq
   %x1= shufflevector <8 x i16> %x0, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
   %y1 = shufflevector <8 x i16> %y0, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
diff --git a/llvm/test/CodeGen/X86/combine-sub-ssat.ll b/llvm/test/CodeGen/X86/combine-sub-ssat.ll
index 812d1b9..b1dcd56 100644
--- a/llvm/test/CodeGen/X86/combine-sub-ssat.ll
+++ b/llvm/test/CodeGen/X86/combine-sub-ssat.ll
@@ -119,16 +119,14 @@ define <8 x i16> @combine_self_v8i16(<8 x i16> %a0) {
 define <8 x i16> @combine_shuffle_shuffle_v8i16(<8 x i16> %x0, <8 x i16> %y0) {
 ; SSE-LABEL: combine_shuffle_shuffle_v8i16:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
 ; SSE-NEXT:    psubsw %xmm1, %xmm0
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_shuffle_shuffle_v8i16:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; AVX-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
 ; AVX-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
 ; AVX-NEXT:    retq
   %x1= shufflevector <8 x i16> %x0, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
   %y1 = shufflevector <8 x i16> %y0, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
diff --git a/llvm/test/CodeGen/X86/combine-sub-usat.ll b/llvm/test/CodeGen/X86/combine-sub-usat.ll
index 594153a..bda3065 100644
--- a/llvm/test/CodeGen/X86/combine-sub-usat.ll
+++ b/llvm/test/CodeGen/X86/combine-sub-usat.ll
@@ -249,16 +249,14 @@ define <8 x i16> @combine_trunc_v8i32_v8i16(<8 x i16> %a0, <8 x i32> %a1) {
 define <8 x i16> @combine_shuffle_shuffle_v8i16(<8 x i16> %x0, <8 x i16> %y0) {
 ; SSE-LABEL: combine_shuffle_shuffle_v8i16:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
 ; SSE-NEXT:    psubusw %xmm1, %xmm0
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_shuffle_shuffle_v8i16:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; AVX-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
 ; AVX-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
 ; AVX-NEXT:    retq
   %x1= shufflevector <8 x i16> %x0, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
   %y1 = shufflevector <8 x i16> %y0, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
-- 
2.7.4