From e8bf0f79bdb60b30b00e15712b786eb5ce990c7a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 26 Feb 2019 15:18:49 +0000 Subject: [PATCH] [InstCombine] canonicalize more unsigned saturated add with 'not' Yet another pattern variation suggested by: https://bugs.llvm.org/show_bug.cgi?id=14613 There are 8 more potential commuted patterns here on top of the 8 that were already handled (rL354221, rL354276, rL354393). We have the obvious commute of the 'add' + commute of the cmp predicate/operands (ugt/ult) + commute of the select operands: Name: base %notx = xor i32 %x, -1 %a = add i32 %notx, %y %c = icmp ult i32 %x, %y %r = select i1 %c, i32 -1, i32 %a => %c2 = icmp ult i32 %a, %y %r = select i1 %c2, i32 -1, i32 %a Name: ugt %notx = xor i32 %x, -1 %a = add i32 %notx, %y %c = icmp ugt i32 %y, %x %r = select i1 %c, i32 -1, i32 %a => %c2 = icmp ult i32 %a, %y %r = select i1 %c2, i32 -1, i32 %a Name: commute select %notx = xor i32 %x, -1 %a = add i32 %notx, %y %c = icmp ult i32 %y, %x %r = select i1 %c, i32 %a, i32 -1 => %c2 = icmp ult i32 %a, %y %r = select i1 %c2, i32 -1, i32 %a Name: ugt + commute select %notx = xor i32 %x, -1 %a = add i32 %notx, %y %c = icmp ugt i32 %x, %y %r = select i1 %c, i32 %a, i32 -1 => %c2 = icmp ult i32 %a, %y %r = select i1 %c2, i32 -1, i32 %a https://rise4fun.com/Alive/den llvm-svn: 354887 --- .../Transforms/InstCombine/InstCombineSelect.cpp | 11 +++++ .../Transforms/InstCombine/saturating-add-sub.ll | 48 +++++++++++----------- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index d153c84..79835ef 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -725,6 +725,17 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, FVal, Y); return Builder.CreateSelect(NewCmp, TVal, FVal); } + // The 'not' op may be included in the sum but not the compare. + X = Cmp0; + Y = Cmp1; + if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) { + // Change the comparison to use the sum (false value of the select). That is + // a canonical pattern match form for uadd.with.overflow: + // (X u< Y) ? -1 : (~X + Y) --> ((~X + Y) u< Y) ? -1 : (~X + Y) + // (X u< Y) ? -1 : (Y + ~X) --> ((Y + ~X) u< Y) ? -1 : (Y + ~X) + Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, FVal, Y); + return Builder.CreateSelect(NewCmp, TVal, FVal); + } return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll index 8e17148..5c88ef3 100644 --- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll @@ -803,9 +803,9 @@ define i32 @uadd_sat_not(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_not( ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 ; CHECK-NEXT: [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]] -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] +; CHECK-NEXT: ret i32 [[TMP2]] ; %notx = xor i32 %x, -1 %a = add i32 %notx, %y @@ -820,9 +820,9 @@ define i32 @uadd_sat_not_commute_add(i32 %xp, i32 %yp) { ; CHECK-NEXT: [[Y:%.*]] = urem i32 42, [[YP:%.*]] ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X]], -1 ; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[Y]], [[NOTX]] -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] +; CHECK-NEXT: ret i32 [[TMP2]] ; %x = srem i32 42, %xp ; thwart complexity-based-canonicalization %y = urem i32 42, %yp ; thwart complexity-based-canonicalization @@ -837,9 +837,9 @@ define i32 @uadd_sat_not_ugt(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_not_ugt( ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 ; CHECK-NEXT: [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]] -; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[Y]], [[X]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] +; CHECK-NEXT: ret i32 [[TMP2]] ; %notx = xor i32 %x, -1 %a = add i32 %notx, %y @@ -853,9 +853,9 @@ define <2 x i32> @uadd_sat_not_ugt_commute_add(<2 x i32> %x, <2 x i32> %yp) { ; CHECK-NEXT: [[Y:%.*]] = sdiv <2 x i32> [[YP:%.*]], ; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[Y]], [[NOTX]] -; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[Y]], [[X]] -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> , <2 x i32> [[A]] -; CHECK-NEXT: ret <2 x i32> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> [[A]] +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %y = sdiv <2 x i32> %yp, ; thwart complexity-based-canonicalization %notx = xor <2 x i32> %x, @@ -869,9 +869,9 @@ define i32 @uadd_sat_not_commute_select(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_not_commute_select( ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 ; CHECK-NEXT: [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]] -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[Y]], [[X]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] +; CHECK-NEXT: ret i32 [[TMP2]] ; %notx = xor i32 %x, -1 %a = add i32 %notx, %y @@ -885,9 +885,9 @@ define i32 @uadd_sat_not_commute_select_commute_add(i32 %x, i32 %yp) { ; CHECK-NEXT: [[Y:%.*]] = sdiv i32 42, [[YP:%.*]] ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 ; CHECK-NEXT: [[A:%.*]] = add i32 [[Y]], [[NOTX]] -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[Y]], [[X]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] +; CHECK-NEXT: ret i32 [[TMP2]] ; %y = sdiv i32 42, %yp ; thwart complexity-based-canonicalization %notx = xor i32 %x, -1 @@ -903,9 +903,9 @@ define <2 x i32> @uadd_sat_not_commute_select_ugt(<2 x i32> %xp, <2 x i32> %yp) ; CHECK-NEXT: [[Y:%.*]] = srem <2 x i32> , [[YP:%.*]] ; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X]], ; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[Y]], [[NOTX]] -; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> [[A]], <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> [[A]] +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %x = urem <2 x i32> , %xp ; thwart complexity-based-canonicalization %y = srem <2 x i32> , %yp ; thwart complexity-based-canonicalization @@ -920,9 +920,9 @@ define i32 @uadd_sat_not_commute_select_ugt_commute_add(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_not_commute_select_ugt_commute_add( ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 ; CHECK-NEXT: [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]] -; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] +; CHECK-NEXT: ret i32 [[TMP2]] ; %notx = xor i32 %x, -1 %a = add i32 %notx, %y -- 2.7.4