From c7f7f601f2c86203f8659e0a765549853f3c2a85 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Sun, 30 Apr 2023 09:32:54 -0500 Subject: [PATCH] [ValueTracking] Handle bitcasts between vec-int-ptr in `isKnownNonZero` We where missing these cases so something like: `(bitcast to i32 (or v216 x, <2, 1>))` would not be found to be non-zero. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D149409 --- llvm/lib/Analysis/ValueTracking.cpp | 35 ++++++++++++++++++++-- llvm/test/Analysis/ValueTracking/known-non-zero.ll | 12 ++------ 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 1be618c..9fb044b 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2741,10 +2741,39 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, if (I->getType()->isPointerTy()) return isGEPKnownNonNull(cast(I), Depth, Q); break; - case Instruction::BitCast: - if (I->getType()->isPointerTy()) + case Instruction::BitCast: { + // We need to be a bit careful here. We can only peek through the bitcast + // if the scalar size of elements in the operand are smaller than and a + // multiple of the size they are casting too. Take three cases: + // + // 1) Unsafe: + // bitcast <2 x i16> %NonZero to <4 x i8> + // + // %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a + // <4 x i8> requires that all 4 i8 elements be non-zero which isn't + // guranteed (imagine just sign bit set in the 2 i16 elements). + // + // 2) Unsafe: + // bitcast <4 x i3> %NonZero to <3 x i4> + // + // Even though the scalar size of the src (`i3`) is smaller than the + // scalar size of the dst `i4`, because `i3` is not a multiple of `i4` + // its possible for the `3 x i4` elements to be zero because there are + // some elements in the destination that don't contain any full src + // element. + // + // 3) Safe: + // bitcast <4 x i8> %NonZero to <2 x i16> + // + // This is always safe as non-zero in the 4 i8 elements implies + // non-zero in the combination of any two adjacent ones. Since i8 is a + // multiple of i16, each i16 is guranteed to have 2 full i8 elements. + // This all implies the 2 i16 elements are non-zero. + Type *FromTy = I->getOperand(0)->getType(); + if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) && + (BitWidth % getBitWidth(FromTy->getScalarType(), Q.DL)) == 0) return isKnownNonZero(I->getOperand(0), Depth, Q); - break; + } break; case Instruction::IntToPtr: // Note that we have to take special care to avoid looking through // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well diff --git a/llvm/test/Analysis/ValueTracking/known-non-zero.ll b/llvm/test/Analysis/ValueTracking/known-non-zero.ll index 9fad0f8..2ae4ba0 100644 --- a/llvm/test/Analysis/ValueTracking/known-non-zero.ll +++ b/llvm/test/Analysis/ValueTracking/known-non-zero.ll @@ -596,11 +596,7 @@ define i1 @fshl_non_zero_fail(i8 %x, i8 %y, i8 %z, i8 %w) { define i1 @bitcast_nonzero(<2 x i8> %xx, i16 %ind) { ; CHECK-LABEL: @bitcast_nonzero( -; CHECK-NEXT: [[XA:%.*]] = add nuw nsw <2 x i8> [[XX:%.*]], -; CHECK-NEXT: [[X:%.*]] = bitcast <2 x i8> [[XA]] to i16 -; CHECK-NEXT: [[Z:%.*]] = or i16 [[X]], [[IND:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[Z]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %xa = add nuw nsw <2 x i8> %xx, %x = bitcast <2 x i8> %xa to i16 @@ -641,11 +637,7 @@ define <2 x i1> @bitcast_fail_nonzero_int_to_vec(i16 %xx, <2 x i8> %ind) { define <2 x i1> @bitcast_veci8_to_veci16(<4 x i8> %xx, <2 x i16> %ind) { ; CHECK-LABEL: @bitcast_veci8_to_veci16( -; CHECK-NEXT: [[XA:%.*]] = add nuw nsw <4 x i8> [[XX:%.*]], -; CHECK-NEXT: [[X:%.*]] = bitcast <4 x i8> [[XA]] to <2 x i16> -; CHECK-NEXT: [[Z:%.*]] = or <2 x i16> [[X]], [[IND:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i16> [[Z]], zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[R]] +; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %xa = add nuw nsw <4 x i8> %xx, %x = bitcast <4 x i8> %xa to <2 x i16> -- 2.7.4