[DAG] PromoteIntRes_ADDSUBSHLSAT - promote ISD::UADDSAT as clamped add

author Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 16 Feb 2021 17:37:32 +0000 (17:37 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 16 Feb 2021 17:37:44 +0000 (17:37 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 16 Feb 2021 17:37:32 +0000 (17:37 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 16 Feb 2021 17:37:44 +0000 (17:37 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

index deeddb4..52f326c 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -784,6 +784,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
    EVT PromotedType = Op1Promoted.getValueType();
    unsigned NewBits = PromotedType.getScalarSizeInBits();
  
+  if (Opcode == ISD::UADDSAT) {
+    APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
+    SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
+    SDValue Add =
+        DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
+    return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
+  }
+
    // USUBSAT can always be promoted as long as we have zero-extended the args.
    if (Opcode == ISD::USUBSAT)
      return DAG.getNode(ISD::USUBSAT, dl, PromotedType, Op1Promoted,
@@ -799,7 +807,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
      case ISD::SSHLSAT:
        ShiftOp = ISD::SRA;
        break;
-    case ISD::UADDSAT:
      case ISD::USHLSAT:
        ShiftOp = ISD::SRL;
        break;
@@ -822,14 +829,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
      return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
    }
  
-  if (Opcode == ISD::UADDSAT) {
-    APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
-    SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
-    SDValue Add =
-        DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
-    return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
-  }
-
    unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB;
    APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits);
    APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits);
diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll

index 43a32b4..21427a6 100644 (file)
--- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
@@ -116,22 +116,21 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
  ; CHECK-NEXT:    ldrb w9, [x1]
  ; CHECK-NEXT:    ldrb w10, [x0, #1]
  ; CHECK-NEXT:    ldrb w11, [x1, #1]
+; CHECK-NEXT:    ldrb w12, [x0, #2]
  ; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ldrb w8, [x1, #2]
  ; CHECK-NEXT:    fmov s1, w9
-; CHECK-NEXT:    ldrb w8, [x0, #2]
-; CHECK-NEXT:    ldrb w9, [x1, #2]
  ; CHECK-NEXT:    mov v0.h[1], w10
+; CHECK-NEXT:    ldrb w9, [x0, #3]
+; CHECK-NEXT:    ldrb w10, [x1, #3]
  ; CHECK-NEXT:    mov v1.h[1], w11
-; CHECK-NEXT:    ldrb w10, [x0, #3]
-; CHECK-NEXT:    ldrb w11, [x1, #3]
-; CHECK-NEXT:    mov v0.h[2], w8
-; CHECK-NEXT:    mov v1.h[2], w9
-; CHECK-NEXT:    mov v0.h[3], w10
-; CHECK-NEXT:    mov v1.h[3], w11
-; CHECK-NEXT:    shl v1.4h, v1.4h, #8
-; CHECK-NEXT:    shl v0.4h, v0.4h, #8
-; CHECK-NEXT:    uqadd v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    ushr v0.4h, v0.4h, #8
+; CHECK-NEXT:    mov v0.h[2], w12
+; CHECK-NEXT:    mov v1.h[2], w8
+; CHECK-NEXT:    mov v0.h[3], w9
+; CHECK-NEXT:    mov v1.h[3], w10
+; CHECK-NEXT:    movi d2, #0xff00ff00ff00ff
+; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    umin v0.4h, v0.4h, v2.4h
  ; CHECK-NEXT:    xtn v0.8b, v0.8h
  ; CHECK-NEXT:    str s0, [x2]
  ; CHECK-NEXT:    ret
@@ -150,13 +149,12 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
  ; CHECK-NEXT:    ldrb w10, [x0, #1]
  ; CHECK-NEXT:    ldrb w11, [x1, #1]
  ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    fmov s1, w9
+; CHECK-NEXT:    fmov s2, w9
  ; CHECK-NEXT:    mov v0.s[1], w10
-; CHECK-NEXT:    mov v1.s[1], w11
-; CHECK-NEXT:    shl v1.2s, v1.2s, #24
-; CHECK-NEXT:    shl v0.2s, v0.2s, #24
-; CHECK-NEXT:    uqadd v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    ushr v0.2s, v0.2s, #24
+; CHECK-NEXT:    mov v2.s[1], w11
+; CHECK-NEXT:    movi d1, #0x0000ff000000ff
+; CHECK-NEXT:    add v0.2s, v0.2s, v2.2s
+; CHECK-NEXT:    umin v0.2s, v0.2s, v1.2s
  ; CHECK-NEXT:    mov w8, v0.s[1]
  ; CHECK-NEXT:    fmov w9, s0
  ; CHECK-NEXT:    strb w9, [x2]
@@ -192,13 +190,12 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
  ; CHECK-NEXT:    ldrh w10, [x0, #2]
  ; CHECK-NEXT:    ldrh w11, [x1, #2]
  ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    fmov s1, w9
+; CHECK-NEXT:    fmov s2, w9
  ; CHECK-NEXT:    mov v0.s[1], w10
-; CHECK-NEXT:    mov v1.s[1], w11
-; CHECK-NEXT:    shl v1.2s, v1.2s, #16
-; CHECK-NEXT:    shl v0.2s, v0.2s, #16
-; CHECK-NEXT:    uqadd v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    ushr v0.2s, v0.2s, #16
+; CHECK-NEXT:    mov v2.s[1], w11
+; CHECK-NEXT:    movi d1, #0x00ffff0000ffff
+; CHECK-NEXT:    add v0.2s, v0.2s, v2.2s
+; CHECK-NEXT:    umin v0.2s, v0.2s, v1.2s
  ; CHECK-NEXT:    mov w8, v0.s[1]
  ; CHECK-NEXT:    fmov w9, s0
  ; CHECK-NEXT:    strh w9, [x2]
@@ -271,12 +268,10 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
  ; CHECK-LABEL: v16i4:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    movi v2.16b, #15
-; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
  ; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT:    shl v1.16b, v1.16b, #4
-; CHECK-NEXT:    shl v0.16b, v0.16b, #4
-; CHECK-NEXT:    uqadd v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    ushr v0.16b, v0.16b, #4
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    add v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    umin v0.16b, v0.16b, v2.16b
  ; CHECK-NEXT:    ret
    %z = call <16 x i4> @llvm.uadd.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
    ret <16 x i4> %z
diff --git a/llvm/test/CodeGen/AMDGPU/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/uaddsat.ll

index 5447d5e..5082772 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
@@ -17,19 +17,15 @@ define i8 @v_uaddsat_i8(i8 %lhs, i8 %rhs) {
  ; GFX8-LABEL: v_uaddsat_i8:
  ; GFX8:       ; %bb.0:
  ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
-; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; GFX8-NEXT:    v_add_u16_e64 v0, v0, v1 clamp
-; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 8, v0
+; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
+; GFX8-NEXT:    v_min_u16_e32 v0, 0xff, v0
  ; GFX8-NEXT:    s_setpc_b64 s[30:31]
  ;
  ; GFX9-LABEL: v_uaddsat_i8:
  ; GFX9:       ; %bb.0:
  ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
-; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; GFX9-NEXT:    v_add_u16_e64 v0, v0, v1 clamp
-; GFX9-NEXT:    v_lshrrev_b16_e32 v0, 8, v0
+; GFX9-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
+; GFX9-NEXT:    v_min_u16_e32 v0, 0xff, v0
  ; GFX9-NEXT:    s_setpc_b64 s[30:31]
    %result = call i8 @llvm.uadd.sat.i8(i8 %lhs, i8 %rhs)
    ret i8 %result
diff --git a/llvm/test/CodeGen/X86/uadd_sat_vec.ll b/llvm/test/CodeGen/X86/uadd_sat_vec.ll

index 5624dfd..d6d0903 100644 (file)
--- a/llvm/test/CodeGen/X86/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/uadd_sat_vec.ll
@@ -481,26 +481,20 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind {
  define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
  ; SSE-LABEL: v16i4:
  ; SSE:       # %bb.0:
-; SSE-NEXT:    psllw $4, %xmm1
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
  ; SSE-NEXT:    pand %xmm2, %xmm1
-; SSE-NEXT:    psllw $4, %xmm0
  ; SSE-NEXT:    pand %xmm2, %xmm0
-; SSE-NEXT:    paddusb %xmm1, %xmm0
-; SSE-NEXT:    psrlw $4, %xmm0
-; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE-NEXT:    paddb %xmm1, %xmm0
+; SSE-NEXT:    pminub %xmm2, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: v16i4:
  ; AVX:       # %bb.0:
-; AVX-NEXT:    vpsllw $4, %xmm1, %xmm1
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
  ; AVX-NEXT:    vpand %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vpsllw $4, %xmm0, %xmm0
  ; AVX-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
-; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpminub %xmm2, %xmm0, %xmm0
  ; AVX-NEXT:    retq
    %z = call <16 x i4> @llvm.uadd.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
    ret <16 x i4> %z
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 16 Feb 2021 17:37:32 +0000 (17:37 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 16 Feb 2021 17:37:44 +0000 (17:37 +0000)
llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/uadd_sat_vec.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/uaddsat.ll		patch \| blob \| history
llvm/test/CodeGen/X86/uadd_sat_vec.ll		patch \| blob \| history