From e8f666f48d87e3d61ae3f68ed96d280805707a22 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sat, 17 Aug 2019 21:35:33 +0000 Subject: [PATCH] [NFC][InstCombine] Some tests for 'shift amount reassoc in bit test - trunc-of-lshr' (PR42399) Finally, the fold i was looking forward to :) The legality check is muddy, i doubt i've groked the full generalization, but it handles all the cases i care about, and can come up with: https://rise4fun.com/Alive/26j https://bugs.llvm.org/show_bug.cgi?id=42399 llvm-svn: 369197 --- ...eassociation-in-bittest-with-truncation-lshr.ll | 546 +++++++++++++++++++++ 1 file changed, 546 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll new file mode 100644 index 0000000..56d4ee7 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll @@ -0,0 +1,546 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -instcombine -S | FileCheck %s + +; Given pattern: +; icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0 +; we should move shifts to the same hand of 'and', i.e. e.g. rewrite as +; icmp eq/ne (and (((x shift Q) shift K), y)), 0 +; We are only interested in opposite logical shifts here. +; We still can handle the case where there is a truncation between a shift +; and an 'and', thought the legality check isn't obvious. + +;------------------------------------------------------------------------------- +; Basic scalar tests +;------------------------------------------------------------------------------- + +; This fold can't be performed for fully variable %x and %y +define i1 @n0(i32 %x, i64 %y, i32 %len) { +; CHECK-LABEL: @n0( +; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -16 +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 +; CHECK-NEXT: [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 +; CHECK-NEXT: ret i1 [[T5]] +; + %t0 = sub i32 32, %len + %t1 = shl i32 %x, %t0 + %t2 = add i32 %len, -16 + %t2_wide = zext i32 %t2 to i64 + %t3 = lshr i64 %y, %t2_wide + %t3_trunc = trunc i64 %t3 to i32 + %t4 = and i32 %t1, %t3_trunc + %t5 = icmp ne i32 %t4, 0 + ret i1 %t5 +} + +; However we can fold if %x/%y are constants that pass extra legality check. + +; New shift amount would be 16, %x has 16 leading zeros - can fold. +define i1 @t1(i64 %y, i32 %len) { +; CHECK-LABEL: @t1( +; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 65535, [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -16 +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 +; CHECK-NEXT: [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 +; CHECK-NEXT: ret i1 [[T5]] +; + %t0 = sub i32 32, %len + %t1 = shl i32 65535, %t0 + %t2 = add i32 %len, -16 + %t2_wide = zext i32 %t2 to i64 + %t3 = lshr i64 %y, %t2_wide + %t3_trunc = trunc i64 %t3 to i32 + %t4 = and i32 %t1, %t3_trunc + %t5 = icmp ne i32 %t4, 0 + ret i1 %t5 +} +; Note that we indeed look at leading zeros! +define i1 @t1_single_bit(i64 %y, i32 %len) { +; CHECK-LABEL: @t1_single_bit( +; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 32768, [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -16 +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 +; CHECK-NEXT: [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 +; CHECK-NEXT: ret i1 [[T5]] +; + %t0 = sub i32 32, %len + %t1 = shl i32 32768, %t0 + %t2 = add i32 %len, -16 + %t2_wide = zext i32 %t2 to i64 + %t3 = lshr i64 %y, %t2_wide + %t3_trunc = trunc i64 %t3 to i32 + %t4 = and i32 %t1, %t3_trunc + %t5 = icmp ne i32 %t4, 0 + ret i1 %t5 +} +; New shift amount would be 16, %x has 15 leading zeros - can not fold. +define i1 @n2(i64 %y, i32 %len) { +; CHECK-LABEL: @n2( +; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 131071, [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -16 +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 +; CHECK-NEXT: [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 +; CHECK-NEXT: ret i1 [[T5]] +; + %t0 = sub i32 32, %len + %t1 = shl i32 131071, %t0 + %t2 = add i32 %len, -16 + %t2_wide = zext i32 %t2 to i64 + %t3 = lshr i64 %y, %t2_wide + %t3_trunc = trunc i64 %t3 to i32 + %t4 = and i32 %t1, %t3_trunc + %t5 = icmp ne i32 %t4, 0 + ret i1 %t5 +} + +; New shift amount would be 16, %y has 47 leading zeros - can fold. +define i1 @t3(i32 %x, i32 %len) { +; CHECK-LABEL: @t3( +; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -16 +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 +; CHECK-NEXT: [[T3:%.*]] = lshr i64 131071, [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 +; CHECK-NEXT: ret i1 [[T5]] +; + %t0 = sub i32 32, %len + %t1 = shl i32 %x, %t0 + %t2 = add i32 %len, -16 + %t2_wide = zext i32 %t2 to i64 + %t3 = lshr i64 131071, %t2_wide + %t3_trunc = trunc i64 %t3 to i32 + %t4 = and i32 %t1, %t3_trunc + %t5 = icmp ne i32 %t4, 0 + ret i1 %t5 +} +; Note that we indeed look at leading zeros! +define i1 @t3_singlebit(i32 %x, i32 %len) { +; CHECK-LABEL: @t3_singlebit( +; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -16 +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 +; CHECK-NEXT: [[T3:%.*]] = lshr i64 65536, [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 +; CHECK-NEXT: ret i1 [[T5]] +; + %t0 = sub i32 32, %len + %t1 = shl i32 %x, %t0 + %t2 = add i32 %len, -16 + %t2_wide = zext i32 %t2 to i64 + %t3 = lshr i64 65536, %t2_wide + %t3_trunc = trunc i64 %t3 to i32 + %t4 = and i32 %t1, %t3_trunc + %t5 = icmp ne i32 %t4, 0 + ret i1 %t5 +} +; New shift amount would be 16, %y has 48 leading zeros - can not fold. +define i1 @n4(i32 %x, i32 %len) { +; CHECK-LABEL: @n4( +; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -16 +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 +; CHECK-NEXT: [[T3:%.*]] = lshr i64 262143, [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 +; CHECK-NEXT: ret i1 [[T5]] +; + %t0 = sub i32 32, %len + %t1 = shl i32 %x, %t0 + %t2 = add i32 %len, -16 + %t2_wide = zext i32 %t2 to i64 + %t3 = lshr i64 262143, %t2_wide + %t3_trunc = trunc i64 %t3 to i32 + %t4 = and i32 %t1, %t3_trunc + %t5 = icmp ne i32 %t4, 0 + ret i1 %t5 +} + +; While we could still deal with arbitrary values if KnownBits can answer +; the question, it isn't obvious it's worth it, so let's not for now. + +;------------------------------------------------------------------------------- +; Vector tests +;------------------------------------------------------------------------------- + +; New shift amount would be 16, minimal count of leading zeros in %x is 16. Ok. +define <2 x i1> @t5_vec(<2 x i64> %y, <2 x i32> %len) { +; CHECK-LABEL: @t5_vec( +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> , [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64> +; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> [[Y:%.*]], [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32> +; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[T5]] +; + %t0 = sub <2 x i32> , %len + %t1 = shl <2 x i32> , %t0 + %t2 = add <2 x i32> %len, + %t2_wide = zext <2 x i32> %t2 to <2 x i64> + %t3 = lshr <2 x i64> %y, %t2_wide + %t3_trunc = trunc <2 x i64> %t3 to <2 x i32> + %t4 = and <2 x i32> %t1, %t3_trunc + %t5 = icmp ne <2 x i32> %t4, + ret <2 x i1> %t5 +} +; New shift amount would be 16, minimal count of leading zeros in %x is 15, not ok to fold. +define <2 x i1> @n6_vec(<2 x i64> %y, <2 x i32> %len) { +; CHECK-LABEL: @n6_vec( +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> , [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64> +; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> [[Y:%.*]], [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32> +; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[T5]] +; + %t0 = sub <2 x i32> , %len + %t1 = shl <2 x i32> , %t0 + %t2 = add <2 x i32> %len, + %t2_wide = zext <2 x i32> %t2 to <2 x i64> + %t3 = lshr <2 x i64> %y, %t2_wide + %t3_trunc = trunc <2 x i64> %t3 to <2 x i32> + %t4 = and <2 x i32> %t1, %t3_trunc + %t5 = icmp ne <2 x i32> %t4, + ret <2 x i1> %t5 +} + +; New shift amount would be 16, minimal count of leading zeros in %x is 47. Ok. +define <2 x i1> @t7_vec(<2 x i32> %x, <2 x i32> %len) { +; CHECK-LABEL: @t7_vec( +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64> +; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> , [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32> +; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[T5]] +; + %t0 = sub <2 x i32> , %len + %t1 = shl <2 x i32> %x, %t0 + %t2 = add <2 x i32> %len, + %t2_wide = zext <2 x i32> %t2 to <2 x i64> + %t3 = lshr <2 x i64> , %t2_wide + %t3_trunc = trunc <2 x i64> %t3 to <2 x i32> + %t4 = and <2 x i32> %t1, %t3_trunc + %t5 = icmp ne <2 x i32> %t4, + ret <2 x i1> %t5 +} +; New shift amount would be 16, minimal count of leading zeros in %x is 48, not ok to fold. +define <2 x i1> @n8_vec(<2 x i32> %x, <2 x i32> %len) { +; CHECK-LABEL: @n8_vec( +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64> +; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> , [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32> +; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[T5]] +; + %t0 = sub <2 x i32> , %len + %t1 = shl <2 x i32> %x, %t0 + %t2 = add <2 x i32> %len, + %t2_wide = zext <2 x i32> %t2 to <2 x i64> + %t3 = lshr <2 x i64> , %t2_wide + %t3_trunc = trunc <2 x i64> %t3 to <2 x i32> + %t4 = and <2 x i32> %t1, %t3_trunc + %t5 = icmp ne <2 x i32> %t4, + ret <2 x i1> %t5 +} + +;------------------------------------------------------------------------------- + +; Ok if the final shift amount is exactly one less than widest bit width. +define i1 @t9_highest_bit(i32 %x, i64 %y, i32 %len) { +; CHECK-LABEL: @t9_highest_bit( +; CHECK-NEXT: [[T0:%.*]] = sub i32 64, [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -1 +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 +; CHECK-NEXT: [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 +; CHECK-NEXT: ret i1 [[T5]] +; + %t0 = sub i32 64, %len + %t1 = shl i32 %x, %t0 + %t2 = add i32 %len, -1 + %t2_wide = zext i32 %t2 to i64 + %t3 = lshr i64 %y, %t2_wide + %t3_trunc = trunc i64 %t3 to i32 + %t4 = and i32 %t1, %t3_trunc + %t5 = icmp ne i32 %t4, 0 + ret i1 %t5 +} +; Not highest bit. +define i1 @t10_almost_highest_bit(i32 %x, i64 %y, i32 %len) { +; CHECK-LABEL: @t10_almost_highest_bit( +; CHECK-NEXT: [[T0:%.*]] = sub i32 64, [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -2 +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 +; CHECK-NEXT: [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 +; CHECK-NEXT: ret i1 [[T5]] +; + %t0 = sub i32 64, %len + %t1 = shl i32 %x, %t0 + %t2 = add i32 %len, -2 + %t2_wide = zext i32 %t2 to i64 + %t3 = lshr i64 %y, %t2_wide + %t3_trunc = trunc i64 %t3 to i32 + %t4 = and i32 %t1, %t3_trunc + %t5 = icmp ne i32 %t4, 0 + ret i1 %t5 +} + +; Ok if the final shift amount is zero. +define i1 @t11_no_shift(i32 %x, i64 %y, i32 %len) { +; CHECK-LABEL: @t11_no_shift( +; CHECK-NEXT: [[T0:%.*]] = sub i32 64, [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -64 +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 +; CHECK-NEXT: [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 +; CHECK-NEXT: ret i1 [[T5]] +; + %t0 = sub i32 64, %len + %t1 = shl i32 %x, %t0 + %t2 = add i32 %len, -64 + %t2_wide = zext i32 %t2 to i64 + %t3 = lshr i64 %y, %t2_wide + %t3_trunc = trunc i64 %t3 to i32 + %t4 = and i32 %t1, %t3_trunc + %t5 = icmp ne i32 %t4, 0 + ret i1 %t5 +} +; Not zero-shift. +define i1 @t10_shift_by_one(i32 %x, i64 %y, i32 %len) { +; CHECK-LABEL: @t10_shift_by_one( +; CHECK-NEXT: [[T0:%.*]] = sub i32 64, [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -63 +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 +; CHECK-NEXT: [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 +; CHECK-NEXT: ret i1 [[T5]] +; + %t0 = sub i32 64, %len + %t1 = shl i32 %x, %t0 + %t2 = add i32 %len, -63 + %t2_wide = zext i32 %t2 to i64 + %t3 = lshr i64 %y, %t2_wide + %t3_trunc = trunc i64 %t3 to i32 + %t4 = and i32 %t1, %t3_trunc + %t5 = icmp ne i32 %t4, 0 + ret i1 %t5 +} + +; A mix of those conditions is ok. +define <2 x i1> @t11_zero_and_almost_bitwidth(<2 x i32> %x, <2 x i64> %y, <2 x i32> %len) { +; CHECK-LABEL: @t11_zero_and_almost_bitwidth( +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64> +; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> [[Y:%.*]], [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32> +; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[T5]] +; + %t0 = sub <2 x i32> , %len + %t1 = shl <2 x i32> %x, %t0 + %t2 = add <2 x i32> %len, + %t2_wide = zext <2 x i32> %t2 to <2 x i64> + %t3 = lshr <2 x i64> %y, %t2_wide + %t3_trunc = trunc <2 x i64> %t3 to <2 x i32> + %t4 = and <2 x i32> %t1, %t3_trunc + %t5 = icmp ne <2 x i32> %t4, + ret <2 x i1> %t5 +} +define <2 x i1> @n12_bad(<2 x i32> %x, <2 x i64> %y, <2 x i32> %len) { +; CHECK-LABEL: @n12_bad( +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64> +; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> [[Y:%.*]], [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32> +; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[T5]] +; + %t0 = sub <2 x i32> , %len + %t1 = shl <2 x i32> %x, %t0 + %t2 = add <2 x i32> %len, + %t2_wide = zext <2 x i32> %t2 to <2 x i64> + %t3 = lshr <2 x i64> %y, %t2_wide + %t3_trunc = trunc <2 x i64> %t3 to <2 x i32> + %t4 = and <2 x i32> %t1, %t3_trunc + %t5 = icmp ne <2 x i32> %t4, + ret <2 x i1> %t5 +} + +;------------------------------------------------------------------------------; + +; Ok if one of the values being shifted is 1 +define i1 @t13_x_is_one(i64 %y, i32 %len) { +; CHECK-LABEL: @t13_x_is_one( +; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 1, [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -16 +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 +; CHECK-NEXT: [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 +; CHECK-NEXT: ret i1 [[T5]] +; + %t0 = sub i32 32, %len + %t1 = shl i32 1, %t0 + %t2 = add i32 %len, -16 + %t2_wide = zext i32 %t2 to i64 + %t3 = lshr i64 %y, %t2_wide + %t3_trunc = trunc i64 %t3 to i32 + %t4 = and i32 %t1, %t3_trunc + %t5 = icmp ne i32 %t4, 0 + ret i1 %t5 +} +define i1 @t14_x_is_one(i32 %x, i32 %len) { +; CHECK-LABEL: @t14_x_is_one( +; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -16 +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 +; CHECK-NEXT: [[T3:%.*]] = lshr i64 1, [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 +; CHECK-NEXT: ret i1 [[T5]] +; + %t0 = sub i32 32, %len + %t1 = shl i32 %x, %t0 + %t2 = add i32 %len, -16 + %t2_wide = zext i32 %t2 to i64 + %t3 = lshr i64 1, %t2_wide + %t3_trunc = trunc i64 %t3 to i32 + %t4 = and i32 %t1, %t3_trunc + %t5 = icmp ne i32 %t4, 0 + ret i1 %t5 +} + +define <2 x i1> @t15_vec_x_is_one_or_zero(<2 x i64> %y, <2 x i32> %len) { +; CHECK-LABEL: @t15_vec_x_is_one_or_zero( +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> , [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64> +; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> [[Y:%.*]], [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32> +; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[T5]] +; + %t0 = sub <2 x i32> , %len + %t1 = shl <2 x i32> , %t0 + %t2 = add <2 x i32> %len, + %t2_wide = zext <2 x i32> %t2 to <2 x i64> + %t3 = lshr <2 x i64> %y, %t2_wide + %t3_trunc = trunc <2 x i64> %t3 to <2 x i32> + %t4 = and <2 x i32> %t1, %t3_trunc + %t5 = icmp ne <2 x i32> %t4, + ret <2 x i1> %t5 +} +define <2 x i1> @t16_vec_y_is_one_or_zero(<2 x i32> %x, <2 x i32> %len) { +; CHECK-LABEL: @t16_vec_y_is_one_or_zero( +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], +; CHECK-NEXT: [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64> +; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> , [[T2_WIDE]] +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32> +; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]] +; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[T5]] +; + %t0 = sub <2 x i32> , %len + %t1 = shl <2 x i32> %x, %t0 + %t2 = add <2 x i32> %len, + %t2_wide = zext <2 x i32> %t2 to <2 x i64> + %t3 = lshr <2 x i64> , %t2_wide + %t3_trunc = trunc <2 x i64> %t3 to <2 x i32> + %t4 = and <2 x i32> %t1, %t3_trunc + %t5 = icmp ne <2 x i32> %t4, + ret <2 x i1> %t5 +} + +;------------------------------------------------------------------------------; + +; All other tests - extra uses, etc are already covered in +; shift-amount-reassociation-in-bittest-with-truncation-shl.ll and +; shift-amount-reassociation-in-bittest.ll + +; And that's the main motivational pattern: +define i1 @rawspeed_signbit(i64 %storage, i32 %nbits) { +; CHECK-LABEL: @rawspeed_signbit( +; CHECK-NEXT: [[SKIPNBITS:%.*]] = sub nsw i32 64, [[NBITS:%.*]] +; CHECK-NEXT: [[SKIPNBITSWIDE:%.*]] = zext i32 [[SKIPNBITS]] to i64 +; CHECK-NEXT: [[DATAWIDE:%.*]] = lshr i64 [[STORAGE:%.*]], [[SKIPNBITSWIDE]] +; CHECK-NEXT: [[DATA:%.*]] = trunc i64 [[DATAWIDE]] to i32 +; CHECK-NEXT: [[NBITSMINUSONE:%.*]] = add nsw i32 [[NBITS]], -1 +; CHECK-NEXT: [[BITMASK:%.*]] = shl i32 1, [[NBITSMINUSONE]] +; CHECK-NEXT: [[BITMASKED:%.*]] = and i32 [[BITMASK]], [[DATA]] +; CHECK-NEXT: [[ISBITUNSET:%.*]] = icmp eq i32 [[BITMASKED]], 0 +; CHECK-NEXT: ret i1 [[ISBITUNSET]] +; + %skipnbits = sub nsw i32 64, %nbits + %skipnbitswide = zext i32 %skipnbits to i64 + %datawide = lshr i64 %storage, %skipnbitswide + %data = trunc i64 %datawide to i32 + %nbitsminusone = add nsw i32 %nbits, -1 + %bitmask = shl i32 1, %nbitsminusone + %bitmasked = and i32 %bitmask, %data + %isbitunset = icmp eq i32 %bitmasked, 0 + ret i1 %isbitunset +} -- 2.7.4