From cb04ba032f573fe75fce0e813ba11b0d47f1159e Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 24 Feb 2019 15:31:27 +0000 Subject: [PATCH] [CGP] add special-cases to form unsigned add with overflow (PR40486) There's likely a missed IR canonicalization for at least 1 of these patterns. Otherwise, we wouldn't have needed the pattern-matching enhancement in D57516. Note that -- unlike usubo added with D57789 -- the TLI hook for this transform defaults to 'on'. So if there's any perf fallout from this, targets should look at how they're lowering the uaddo node in SDAG and/or override that hook. The x86 diffs suggest that there's some missing pattern-matching for forming inc/dec. This should fix the remaining known problems in: https://bugs.llvm.org/show_bug.cgi?id=40486 https://bugs.llvm.org/show_bug.cgi?id=31754 llvm-svn: 354746 --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 35 ++++++++++++++++----- llvm/test/CodeGen/AArch64/uaddo.ll | 26 ++++++---------- llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll | 27 ++++++---------- llvm/test/CodeGen/X86/rd-mod-wr-eflags.ll | 11 +++---- .../CodeGenPrepare/X86/overflow-intrinsics.ll | 36 ++++++++++++---------- 5 files changed, 71 insertions(+), 64 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index a6a4ea2..e56bd03 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1197,6 +1197,31 @@ static bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp, return true; } +/// Match special-case patterns that check for unsigned add overflow. +static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, + BinaryOperator *&Add) { + // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val) + // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero) + Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1); + ICmpInst::Predicate Pred = Cmp->getPredicate(); + if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes())) + B = ConstantInt::get(B->getType(), 1); + else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) + B = ConstantInt::get(B->getType(), -1); + else + return false; + + // Check the users of the variable operand of the compare looking for an add + // with the adjusted constant. + for (User *U : A->users()) { + if (match(U, m_Add(m_Specific(A), m_Specific(B)))) { + Add = cast(U); + return true; + } + } + return false; +} + /// Try to combine the compare into a call to the llvm.uadd.with.overflow /// intrinsic. Return true if any changes were made. static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI, @@ -1204,7 +1229,8 @@ static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI, Value *A, *B; BinaryOperator *Add; if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) - return false; + if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add)) + return false; if (!TLI.shouldFormOverflowOp(ISD::UADDO, TLI.getValueType(DL, Add->getType()))) @@ -1216,13 +1242,6 @@ static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI, if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse()) return false; -#ifndef NDEBUG - // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption - // for now: - if (Add->hasOneUse()) - assert(*Add->user_begin() == Cmp && "expected!"); -#endif - if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow)) return false; diff --git a/llvm/test/CodeGen/AArch64/uaddo.ll b/llvm/test/CodeGen/AArch64/uaddo.ll index 981528af..cb34de5 100644 --- a/llvm/test/CodeGen/AArch64/uaddo.ll +++ b/llvm/test/CodeGen/AArch64/uaddo.ll @@ -8,9 +8,8 @@ define i1 @uaddo_i64_increment_alt(i64 %x, i64* %p) { ; CHECK-LABEL: uaddo_i64_increment_alt: ; CHECK: // %bb.0: -; CHECK-NEXT: cmn x0, #1 // =1 -; CHECK-NEXT: add x8, x0, #1 // =1 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: adds x8, x0, #1 // =1 +; CHECK-NEXT: cset w0, hs ; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: ret %a = add i64 %x, 1 @@ -24,11 +23,9 @@ define i1 @uaddo_i64_increment_alt(i64 %x, i64* %p) { define i1 @uaddo_i64_increment_alt_dom(i64 %x, i64* %p) { ; CHECK-LABEL: uaddo_i64_increment_alt_dom: ; CHECK: // %bb.0: -; CHECK-NEXT: cmn x0, #1 // =1 -; CHECK-NEXT: cset w8, eq -; CHECK-NEXT: add x9, x0, #1 // =1 -; CHECK-NEXT: mov w0, w8 -; CHECK-NEXT: str x9, [x1] +; CHECK-NEXT: adds x8, x0, #1 // =1 +; CHECK-NEXT: cset w0, hs +; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: ret %ov = icmp eq i64 %x, -1 %a = add i64 %x, 1 @@ -41,9 +38,8 @@ define i1 @uaddo_i64_increment_alt_dom(i64 %x, i64* %p) { define i1 @uaddo_i64_decrement_alt(i64 %x, i64* %p) { ; CHECK-LABEL: uaddo_i64_decrement_alt: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, #0 // =0 -; CHECK-NEXT: sub x8, x0, #1 // =1 -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: subs x8, x0, #1 // =1 +; CHECK-NEXT: cset w0, hs ; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: ret %a = add i64 %x, -1 @@ -57,11 +53,9 @@ define i1 @uaddo_i64_decrement_alt(i64 %x, i64* %p) { define i1 @uaddo_i64_decrement_alt_dom(i64 %x, i64* %p) { ; CHECK-LABEL: uaddo_i64_decrement_alt_dom: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, #0 // =0 -; CHECK-NEXT: cset w8, ne -; CHECK-NEXT: sub x9, x0, #1 // =1 -; CHECK-NEXT: mov w0, w8 -; CHECK-NEXT: str x9, [x1] +; CHECK-NEXT: subs x8, x0, #1 // =1 +; CHECK-NEXT: cset w0, hs +; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: ret %ov = icmp ne i64 %x, 0 %a = add i64 %x, -1 diff --git a/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll b/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll index 921f0da..2c4435b 100644 --- a/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll +++ b/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll @@ -229,12 +229,7 @@ define void @test_18446744073709551614(i64*, i64*) { define void @test_18446744073709551615(i64*, i64*) { ; CHECK-LABEL: test_18446744073709551615: ; CHECK: # %bb.0: -; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: leaq -1(%rax), %rcx -; CHECK-NEXT: movq %rcx, (%rdi) -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: setne %al -; CHECK-NEXT: addb $-1, %al +; CHECK-NEXT: addq $-1, (%rdi) ; CHECK-NEXT: adcq $0, (%rsi) ; CHECK-NEXT: retq %3 = load i64, i64* %0, align 8 @@ -272,10 +267,9 @@ define i1 @illegal_type(i17 %x, i17* %p) { define i1 @uaddo_i64_increment_alt(i64 %x, i64* %p) { ; CHECK-LABEL: uaddo_i64_increment_alt: ; CHECK: # %bb.0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: movq %rax, (%rsi) -; CHECK-NEXT: cmpq $-1, %rdi +; CHECK-NEXT: incq %rdi ; CHECK-NEXT: sete %al +; CHECK-NEXT: movq %rdi, (%rsi) ; CHECK-NEXT: retq %a = add i64 %x, 1 store i64 %a, i64* %p @@ -288,9 +282,8 @@ define i1 @uaddo_i64_increment_alt(i64 %x, i64* %p) { define i1 @uaddo_i64_increment_alt_dom(i64 %x, i64* %p) { ; CHECK-LABEL: uaddo_i64_increment_alt_dom: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpq $-1, %rdi -; CHECK-NEXT: sete %al ; CHECK-NEXT: incq %rdi +; CHECK-NEXT: sete %al ; CHECK-NEXT: movq %rdi, (%rsi) ; CHECK-NEXT: retq %ov = icmp eq i64 %x, -1 @@ -304,10 +297,9 @@ define i1 @uaddo_i64_increment_alt_dom(i64 %x, i64* %p) { define i1 @uaddo_i64_decrement_alt(i64 %x, i64* %p) { ; CHECK-LABEL: uaddo_i64_decrement_alt: ; CHECK: # %bb.0: -; CHECK-NEXT: leaq -1(%rdi), %rax -; CHECK-NEXT: movq %rax, (%rsi) -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: setne %al +; CHECK-NEXT: addq $-1, %rdi +; CHECK-NEXT: setb %al +; CHECK-NEXT: movq %rdi, (%rsi) ; CHECK-NEXT: retq %a = add i64 %x, -1 store i64 %a, i64* %p @@ -320,9 +312,8 @@ define i1 @uaddo_i64_decrement_alt(i64 %x, i64* %p) { define i1 @uaddo_i64_decrement_alt_dom(i64 %x, i64* %p) { ; CHECK-LABEL: uaddo_i64_decrement_alt_dom: ; CHECK: # %bb.0: -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: setne %al -; CHECK-NEXT: decq %rdi +; CHECK-NEXT: addq $-1, %rdi +; CHECK-NEXT: setb %al ; CHECK-NEXT: movq %rdi, (%rsi) ; CHECK-NEXT: retq %ov = icmp ne i64 %x, 0 diff --git a/llvm/test/CodeGen/X86/rd-mod-wr-eflags.ll b/llvm/test/CodeGen/X86/rd-mod-wr-eflags.ll index 410d736..a2fc94d 100644 --- a/llvm/test/CodeGen/X86/rd-mod-wr-eflags.ll +++ b/llvm/test/CodeGen/X86/rd-mod-wr-eflags.ll @@ -68,13 +68,12 @@ define i32 @test2() nounwind uwtable ssp { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: movq {{.*}}(%rip), %rax -; CHECK-NEXT: leaq -1(%rax), %rsi +; CHECK-NEXT: movq {{.*}}(%rip), %rsi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addq $-1, %rsi +; CHECK-NEXT: setb %al ; CHECK-NEXT: movq %rsi, {{.*}}(%rip) -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: setne %cl -; CHECK-NEXT: movl %ecx, {{.*}}(%rip) +; CHECK-NEXT: movl %eax, {{.*}}(%rip) ; CHECK-NEXT: movl $.L.str, %edi ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: callq printf diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll index 177aee7..0e75171 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll @@ -162,10 +162,11 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, i16* %p) { define i1 @uaddo_i64_increment_alt(i64 %x, i64* %p) { ; CHECK-LABEL: @uaddo_i64_increment_alt( -; CHECK-NEXT: [[A:%.*]] = add i64 [[X:%.*]], 1 -; CHECK-NEXT: store i64 [[A]], i64* [[P:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp eq i64 [[X]], -1 -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 1) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] ; %a = add i64 %x, 1 store i64 %a, i64* %p @@ -177,10 +178,11 @@ define i1 @uaddo_i64_increment_alt(i64 %x, i64* %p) { define i1 @uaddo_i64_increment_alt_dom(i64 %x, i64* %p) { ; CHECK-LABEL: @uaddo_i64_increment_alt_dom( -; CHECK-NEXT: [[OV:%.*]] = icmp eq i64 [[X:%.*]], -1 -; CHECK-NEXT: [[A:%.*]] = add i64 [[X]], 1 -; CHECK-NEXT: store i64 [[A]], i64* [[P:%.*]] -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 1) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] ; %ov = icmp eq i64 %x, -1 %a = add i64 %x, 1 @@ -192,10 +194,11 @@ define i1 @uaddo_i64_increment_alt_dom(i64 %x, i64* %p) { define i1 @uaddo_i64_decrement_alt(i64 %x, i64* %p) { ; CHECK-LABEL: @uaddo_i64_decrement_alt( -; CHECK-NEXT: [[A:%.*]] = add i64 [[X:%.*]], -1 -; CHECK-NEXT: store i64 [[A]], i64* [[P:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp ne i64 [[X]], 0 -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 -1) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] ; %a = add i64 %x, -1 store i64 %a, i64* %p @@ -207,10 +210,11 @@ define i1 @uaddo_i64_decrement_alt(i64 %x, i64* %p) { define i1 @uaddo_i64_decrement_alt_dom(i64 %x, i64* %p) { ; CHECK-LABEL: @uaddo_i64_decrement_alt_dom( -; CHECK-NEXT: [[OV:%.*]] = icmp ne i64 [[X:%.*]], 0 -; CHECK-NEXT: [[A:%.*]] = add i64 [[X]], -1 -; CHECK-NEXT: store i64 [[A]], i64* [[P:%.*]] -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 -1) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] ; %ov = icmp ne i64 %x, 0 %a = add i64 %x, -1 -- 2.7.4