From 3fb13b8484dcbec085da047879bf89ccb1b65b12 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 28 Jul 2020 15:13:19 -0400 Subject: [PATCH] [InstSimplify] allow undefs in icmp with vector constant folds This is the main icmp simplification shortcoming seen in D84655. Alive2 agrees that the basic examples are correct at least: define <2 x i1> @src(<2 x i8> %x) { %0: %r = icmp sle <2 x i8> { undef, 128 }, %x ret <2 x i1> %r } => define <2 x i1> @tgt(<2 x i8> %x) { %0: ret <2 x i1> { 1, 1 } } Transformation seems to be correct! define <2 x i1> @src(<2 x i32> %X) { %0: %A = or <2 x i32> %X, { 63, 63 } %B = icmp ult <2 x i32> %A, { undef, 50 } ret <2 x i1> %B } => define <2 x i1> @tgt(<2 x i32> %X) { %0: ret <2 x i1> { 0, 0 } } Transformation seems to be correct! https://alive2.llvm.org/ce/z/omt2ee https://alive2.llvm.org/ce/z/GW4nP_ Differential Revision: https://reviews.llvm.org/D84762 --- llvm/lib/Analysis/InstructionSimplify.cpp | 2 +- llvm/test/Transforms/InstSimplify/icmp-constant.ll | 22 ++++++---------------- 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index c920fb3..01b2eb1 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2732,7 +2732,7 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, } const APInt *C; - if (!match(RHS, m_APInt(C))) + if (!match(RHS, m_APIntAllowUndef(C))) return nullptr; // Rule out tautological comparisons (eg., ult 0 or uge 0). diff --git a/llvm/test/Transforms/InstSimplify/icmp-constant.ll b/llvm/test/Transforms/InstSimplify/icmp-constant.ll index 8f51ba3..94126bf 100644 --- a/llvm/test/Transforms/InstSimplify/icmp-constant.ll +++ b/llvm/test/Transforms/InstSimplify/icmp-constant.ll @@ -21,8 +21,7 @@ define <2 x i1> @tautological_ule_vec(<2 x i8> %x) { define <2 x i1> @tautological_ule_vec_partial_undef(<2 x i8> %x) { ; CHECK-LABEL: @tautological_ule_vec_partial_undef( -; CHECK-NEXT: [[CMP:%.*]] = icmp ule <2 x i8> [[X:%.*]], -; CHECK-NEXT: ret <2 x i1> [[CMP]] +; CHECK-NEXT: ret <2 x i1> ; %cmp = icmp ule <2 x i8> %x, ret <2 x i1> %cmp @@ -46,8 +45,7 @@ define <2 x i1> @tautological_ugt_vec(<2 x i8> %x) { define <2 x i1> @tautological_ugt_vec_partial_undef(<2 x i8> %x) { ; CHECK-LABEL: @tautological_ugt_vec_partial_undef( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> [[X:%.*]], -; CHECK-NEXT: ret <2 x i1> [[CMP]] +; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %cmp = icmp ugt <2 x i8> %x, ret <2 x i1> %cmp @@ -74,9 +72,7 @@ define <2 x i1> @urem3_vec(<2 x i32> %X) { define <2 x i1> @urem3_vec_partial_undef(<2 x i32> %X) { ; CHECK-LABEL: @urem3_vec_partial_undef( -; CHECK-NEXT: [[A:%.*]] = urem <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = icmp ult <2 x i32> [[A]], -; CHECK-NEXT: ret <2 x i1> [[B]] +; CHECK-NEXT: ret <2 x i1> ; %A = urem <2 x i32> %X, %B = icmp ult <2 x i32> %A, @@ -104,9 +100,7 @@ define <2 x i1> @srem1_vec(<2 x i32> %X) { define <2 x i1> @srem1_vec_partial_undef(<2 x i32> %X) { ; CHECK-LABEL: @srem1_vec_partial_undef( -; CHECK-NEXT: [[A:%.*]] = srem <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = icmp sgt <2 x i32> [[A]], -; CHECK-NEXT: ret <2 x i1> [[B]] +; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %A = srem <2 x i32> %X, %B = icmp sgt <2 x i32> %A, @@ -211,9 +205,7 @@ define <2 x i1> @shl5_vec(<2 x i32> %X) { define <2 x i1> @shl5_vec_partial_undef(<2 x i32> %X) { ; CHECK-LABEL: @shl5_vec_partial_undef( -; CHECK-NEXT: [[SUB:%.*]] = shl nuw <2 x i32> , [[X:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[SUB]], -; CHECK-NEXT: ret <2 x i1> [[CMP]] +; CHECK-NEXT: ret <2 x i1> ; %sub = shl nuw <2 x i32> , %X %cmp = icmp ugt <2 x i32> %sub, @@ -431,9 +423,7 @@ define <2 x i1> @or1_vec(<2 x i32> %X) { define <2 x i1> @or1_vec_partial_undef(<2 x i32> %X) { ; CHECK-LABEL: @or1_vec_partial_undef( -; CHECK-NEXT: [[A:%.*]] = or <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = icmp ult <2 x i32> [[A]], -; CHECK-NEXT: ret <2 x i1> [[B]] +; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %A = or <2 x i32> %X, %B = icmp ult <2 x i32> %A, -- 2.7.4