From 3f77724de7bc84f5f6a89b81de990b2c49f93953 Mon Sep 17 00:00:00 2001 From: Dhruv Chawla <44582521+dc03@users.noreply.github.com> Date: Thu, 22 Jun 2023 22:32:01 +0530 Subject: [PATCH] [TargetLowering] Better code generation for ISD::SADDSAT/SSUBSAT when operand sign is known When the sign of either of the operands is known, it is possible to determine what the saturating value will be without having to compute it using the sign bits. Differential Revision: https://reviews.llvm.org/D153575 --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 31 ++++++ .../AArch64/aarch64-saturating-arithmetic.ll | 108 +++++++++------------ llvm/test/CodeGen/X86/combine-add-ssat.ll | 5 +- 3 files changed, 78 insertions(+), 66 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3d213d3..ed8d884 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -9746,6 +9746,37 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff); } + if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) { + APInt MinVal = APInt::getSignedMinValue(BitWidth); + APInt MaxVal = APInt::getSignedMaxValue(BitWidth); + + KnownBits KnownLHS = DAG.computeKnownBits(LHS); + KnownBits KnownRHS = DAG.computeKnownBits(RHS); + + // If either of the operand signs are known, then they are guaranteed to + // only saturate in one direction. If non-negative they will saturate + // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN. + // + // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the + // sign of 'y' has to be flipped. + + bool LHSIsNonNegative = KnownLHS.isNonNegative(); + bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative() + : KnownRHS.isNegative(); + if (LHSIsNonNegative || RHSIsNonNegative) { + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff); + } + + bool LHSIsNegative = KnownLHS.isNegative(); + bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative() + : KnownRHS.isNonNegative(); + if (LHSIsNegative || RHSIsNegative) { + SDValue SatMin = DAG.getConstant(MinVal, dl, VT); + return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff); + } + } + // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff APInt MinVal = APInt::getSignedMinValue(BitWidth); SDValue SatMin = DAG.getConstant(MinVal, dl, VT); diff --git a/llvm/test/CodeGen/AArch64/aarch64-saturating-arithmetic.ll b/llvm/test/CodeGen/AArch64/aarch64-saturating-arithmetic.ll index 9c6cdee..06cb134 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-saturating-arithmetic.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-saturating-arithmetic.ll @@ -4,10 +4,9 @@ define i64 @test_ssub_nonneg_rhs(i64 %x) { ; CHECK-LABEL: test_ssub_nonneg_rhs: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, #1 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: subs x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %sat = call i64 @llvm.ssub.sat.i64(i64 %x, i64 1) ret i64 %sat @@ -16,10 +15,9 @@ define i64 @test_ssub_nonneg_rhs(i64 %x) { define i64 @test_ssub_neg_rhs(i64 %x) { ; CHECK-LABEL: test_ssub_neg_rhs: ; CHECK: // %bb.0: -; CHECK-NEXT: adds x8, x0, #1 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: adds x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %sat = call i64 @llvm.ssub.sat.i64(i64 %x, i64 -1) ret i64 %sat @@ -28,10 +26,9 @@ define i64 @test_ssub_neg_rhs(i64 %x) { define i64 @test_sadd_nonneg_rhs(i64 %x) { ; CHECK-LABEL: test_sadd_nonneg_rhs: ; CHECK: // %bb.0: -; CHECK-NEXT: adds x8, x0, #1 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: adds x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %sat = call i64 @llvm.sadd.sat.i64(i64 %x, i64 1) ret i64 %sat @@ -41,10 +38,9 @@ define i64 @test_sadd_nonneg_rhs(i64 %x) { define i64 @test_sadd_neg_rhs(i64 %x) { ; CHECK-LABEL: test_sadd_neg_rhs: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, #1 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: subs x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %sat = call i64 @llvm.sadd.sat.i64(i64 %x, i64 -1) ret i64 %sat @@ -54,9 +50,8 @@ define i64 @test_ssub_nonneg_lhs(i64 %x) { ; CHECK-LABEL: test_ssub_nonneg_lhs: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: mov x9, #9223372036854775807 // =0x7fffffffffffffff ; CHECK-NEXT: subs x8, x8, x0 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 ; CHECK-NEXT: csel x0, x9, x8, vs ; CHECK-NEXT: ret %sat = call i64 @llvm.ssub.sat.i64(i64 1, i64 %x) @@ -67,9 +62,8 @@ define i64 @test_ssub_neg_lhs(i64 %x) { ; CHECK-LABEL: test_ssub_neg_lhs: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov x9, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: subs x8, x8, x0 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 ; CHECK-NEXT: csel x0, x9, x8, vs ; CHECK-NEXT: ret %sat = call i64 @llvm.ssub.sat.i64(i64 -1, i64 %x) @@ -79,10 +73,9 @@ define i64 @test_ssub_neg_lhs(i64 %x) { define i64 @test_sadd_nonneg_lhs(i64 %x) { ; CHECK-LABEL: test_sadd_nonneg_lhs: ; CHECK: // %bb.0: -; CHECK-NEXT: adds x8, x0, #1 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: adds x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %sat = call i64 @llvm.sadd.sat.i64(i64 1, i64 %x) ret i64 %sat @@ -91,10 +84,9 @@ define i64 @test_sadd_nonneg_lhs(i64 %x) { define i64 @test_sadd_neg_lhs(i64 %x) { ; CHECK-LABEL: test_sadd_neg_lhs: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, #1 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: subs x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %sat = call i64 @llvm.sadd.sat.i64(i64 -1, i64 %x) ret i64 %sat @@ -104,10 +96,9 @@ define i64 @test_ssub_nonneg_rhs_nonconst(i64 %x) { ; CHECK-LABEL: test_ssub_nonneg_rhs_nonconst: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #123 // =0x7b +; CHECK-NEXT: mov x9, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: and x8, x0, x8 ; CHECK-NEXT: subs x8, x0, x8 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 ; CHECK-NEXT: csel x0, x9, x8, vs ; CHECK-NEXT: ret %y = and i64 %x, 123 @@ -119,11 +110,10 @@ define i64 @test_ssub_neg_rhs_nonconst(i64 %x) { ; CHECK-LABEL: test_ssub_neg_rhs_nonconst: ; CHECK: // %bb.0: ; CHECK-NEXT: cmn x0, #1 -; CHECK-NEXT: csinv x8, x0, xzr, lt -; CHECK-NEXT: subs x8, x0, x8 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: csinv x9, x0, xzr, lt +; CHECK-NEXT: subs x9, x0, x9 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %y = call i64 @llvm.smin(i64 %x, i64 -1) %sat = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %y) @@ -134,11 +124,10 @@ define i64 @test_sadd_nonneg_rhs_nonconst(i64 %x) { ; CHECK-LABEL: test_sadd_nonneg_rhs_nonconst: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp x0, #1 -; CHECK-NEXT: csinc x8, x0, xzr, gt -; CHECK-NEXT: adds x8, x0, x8 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: csinc x9, x0, xzr, gt +; CHECK-NEXT: adds x9, x0, x9 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %y = call i64 @llvm.smax(i64 %x, i64 1) %sat = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y) @@ -149,11 +138,10 @@ define i64 @test_sadd_nonneg_rhs_nonconst(i64 %x) { define i64 @test_sadd_neg_rhs_nonconst(i64 %x) { ; CHECK-LABEL: test_sadd_neg_rhs_nonconst: ; CHECK: // %bb.0: -; CHECK-NEXT: orr x8, x0, #0x8000000000000000 -; CHECK-NEXT: adds x8, x0, x8 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: orr x9, x0, #0x8000000000000000 +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: adds x9, x0, x9 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %y = or i64 %x, u0x8000000000000000 %sat = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y) @@ -164,10 +152,9 @@ define i64 @test_ssub_nonneg_lhs_nonconst(i64 %x) { ; CHECK-LABEL: test_ssub_nonneg_lhs_nonconst: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #123 // =0x7b +; CHECK-NEXT: mov x9, #9223372036854775807 // =0x7fffffffffffffff ; CHECK-NEXT: and x8, x0, x8 ; CHECK-NEXT: subs x8, x8, x0 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 ; CHECK-NEXT: csel x0, x9, x8, vs ; CHECK-NEXT: ret %y = and i64 %x, 123 @@ -179,11 +166,10 @@ define i64 @test_ssub_neg_lhs_nonconst(i64 %x) { ; CHECK-LABEL: test_ssub_neg_lhs_nonconst: ; CHECK: // %bb.0: ; CHECK-NEXT: cmn x0, #1 -; CHECK-NEXT: csinv x8, x0, xzr, lt -; CHECK-NEXT: subs x8, x8, x0 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: csinv x9, x0, xzr, lt +; CHECK-NEXT: subs x9, x9, x0 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %y = call i64 @llvm.smin(i64 %x, i64 -1) %sat = call i64 @llvm.ssub.sat.i64(i64 %y, i64 %x) @@ -194,11 +180,10 @@ define i64 @test_sadd_nonneg_lhs_nonconst(i64 %x) { ; CHECK-LABEL: test_sadd_nonneg_lhs_nonconst: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp x0, #1 -; CHECK-NEXT: csinc x8, x0, xzr, gt -; CHECK-NEXT: adds x8, x8, x0 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: csinc x9, x0, xzr, gt +; CHECK-NEXT: adds x9, x9, x0 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %y = call i64 @llvm.smax(i64 %x, i64 1) %sat = call i64 @llvm.sadd.sat.i64(i64 %y, i64 %x) @@ -208,11 +193,10 @@ define i64 @test_sadd_nonneg_lhs_nonconst(i64 %x) { define i64 @test_sadd_neg_lhs_nonconst(i64 %x) { ; CHECK-LABEL: test_sadd_neg_lhs_nonconst: ; CHECK: // %bb.0: -; CHECK-NEXT: orr x8, x0, #0x8000000000000000 -; CHECK-NEXT: adds x8, x8, x0 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: orr x9, x0, #0x8000000000000000 +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: adds x9, x9, x0 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %y = or i64 %x, u0x8000000000000000 %sat = call i64 @llvm.sadd.sat.i64(i64 %y, i64 %x) diff --git a/llvm/test/CodeGen/X86/combine-add-ssat.ll b/llvm/test/CodeGen/X86/combine-add-ssat.ll index 10decfd..3e21798 100644 --- a/llvm/test/CodeGen/X86/combine-add-ssat.ll +++ b/llvm/test/CodeGen/X86/combine-add-ssat.ll @@ -77,11 +77,8 @@ define <8 x i16> @combine_constfold_undef_v8i16() { define i32 @combine_constant_i32(i32 %a0) { ; CHECK-LABEL: combine_constant_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: sarl $31, %eax -; CHECK-NEXT: addl $-2147483648, %eax # imm = 0x80000000 ; CHECK-NEXT: incl %edi +; CHECK-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF ; CHECK-NEXT: cmovnol %edi, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.sadd.sat.i32(i32 1, i32 %a0) -- 2.7.4