From 09c575e728e2f24f76ea7f562e61fccaa225d72d Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 28 Sep 2021 14:28:53 -0400 Subject: [PATCH] [InstCombine] add/move tests for shl with binop; NFC --- llvm/test/Transforms/InstCombine/pr19420.ll | 202 ------------ llvm/test/Transforms/InstCombine/shift.ll | 28 -- llvm/test/Transforms/InstCombine/shl-bo.ll | 485 ++++++++++++++++++++++++++++ 3 files changed, 485 insertions(+), 230 deletions(-) delete mode 100644 llvm/test/Transforms/InstCombine/pr19420.ll create mode 100644 llvm/test/Transforms/InstCombine/shl-bo.ll diff --git a/llvm/test/Transforms/InstCombine/pr19420.ll b/llvm/test/Transforms/InstCombine/pr19420.ll deleted file mode 100644 index aeeed2f..0000000 --- a/llvm/test/Transforms/InstCombine/pr19420.ll +++ /dev/null @@ -1,202 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -instcombine < %s | FileCheck %s - -define <4 x i32> @test_FoldShiftByConstant_CreateSHL(<4 x i32> %in) { -; CHECK-LABEL: @test_FoldShiftByConstant_CreateSHL( -; CHECK-NEXT: [[VSHL_N:%.*]] = mul <4 x i32> [[IN:%.*]], -; CHECK-NEXT: ret <4 x i32> [[VSHL_N]] -; - %mul.i = mul <4 x i32> %in, - %vshl_n = shl <4 x i32> %mul.i, - ret <4 x i32> %vshl_n -} - -define <8 x i16> @test_FoldShiftByConstant_CreateSHL2(<8 x i16> %in) { -; CHECK-LABEL: @test_FoldShiftByConstant_CreateSHL2( -; CHECK-NEXT: [[VSHL_N:%.*]] = mul <8 x i16> [[IN:%.*]], -; CHECK-NEXT: ret <8 x i16> [[VSHL_N]] -; - %mul.i = mul <8 x i16> %in, - %vshl_n = shl <8 x i16> %mul.i, - ret <8 x i16> %vshl_n -} - -define <16 x i8> @test_FoldShiftByConstant_CreateAnd(<16 x i8> %in0) { -; CHECK-LABEL: @test_FoldShiftByConstant_CreateAnd( -; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i8> [[IN0:%.*]], -; CHECK-NEXT: [[VSHL_N:%.*]] = and <16 x i8> [[TMP1]], -; CHECK-NEXT: ret <16 x i8> [[VSHL_N]] -; - %vsra_n = ashr <16 x i8> %in0, - %tmp = add <16 x i8> %in0, %vsra_n - %vshl_n = shl <16 x i8> %tmp, - ret <16 x i8> %vshl_n -} - -define i32 @lshr_add_shl(i32 %x, i32 %y) { -; CHECK-LABEL: @lshr_add_shl( -; CHECK-NEXT: [[B1:%.*]] = shl i32 [[Y:%.*]], 4 -; CHECK-NEXT: [[A2:%.*]] = add i32 [[B1]], [[X:%.*]] -; CHECK-NEXT: [[C:%.*]] = and i32 [[A2]], -16 -; CHECK-NEXT: ret i32 [[C]] -; - %a = lshr i32 %x, 4 - %b = add i32 %a, %y - %c = shl i32 %b, 4 - ret i32 %c -} - -define <2 x i32> @lshr_add_shl_v2i32(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @lshr_add_shl_v2i32( -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i32> [[Y:%.*]], -; CHECK-NEXT: [[A2:%.*]] = add <2 x i32> [[B1]], [[X:%.*]] -; CHECK-NEXT: [[C:%.*]] = and <2 x i32> [[A2]], -; CHECK-NEXT: ret <2 x i32> [[C]] -; - %a = lshr <2 x i32> %x, - %b = add <2 x i32> %a, %y - %c = shl <2 x i32> %b, - ret <2 x i32> %c -} - -define <2 x i32> @lshr_add_shl_v2i32_undef(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @lshr_add_shl_v2i32_undef( -; CHECK-NEXT: [[A:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = add <2 x i32> [[A]], [[Y:%.*]] -; CHECK-NEXT: [[C:%.*]] = shl <2 x i32> [[B]], -; CHECK-NEXT: ret <2 x i32> [[C]] -; - %a = lshr <2 x i32> %x, - %b = add <2 x i32> %a, %y - %c = shl <2 x i32> %b, - ret <2 x i32> %c -} - -define <2 x i32> @lshr_add_shl_v2i32_nonuniform(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @lshr_add_shl_v2i32_nonuniform( -; CHECK-NEXT: [[A:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = add <2 x i32> [[A]], [[Y:%.*]] -; CHECK-NEXT: [[C:%.*]] = shl <2 x i32> [[B]], -; CHECK-NEXT: ret <2 x i32> [[C]] -; - %a = lshr <2 x i32> %x, - %b = add <2 x i32> %a, %y - %c = shl <2 x i32> %b, - ret <2 x i32> %c -} - -define i32 @lshr_add_and_shl(i32 %x, i32 %y) { -; CHECK-LABEL: @lshr_add_and_shl( -; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[Y:%.*]], 5 -; CHECK-NEXT: [[X_MASK:%.*]] = and i32 [[X:%.*]], 4064 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[X_MASK]], [[TMP1]] -; CHECK-NEXT: ret i32 [[TMP2]] -; - %1 = lshr i32 %x, 5 - %2 = and i32 %1, 127 - %3 = add i32 %y, %2 - %4 = shl i32 %3, 5 - ret i32 %4 -} - -define <2 x i32> @lshr_add_and_shl_v2i32(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @lshr_add_and_shl_v2i32( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[Y:%.*]], -; CHECK-NEXT: [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[X_MASK]], [[TMP1]] -; CHECK-NEXT: ret <2 x i32> [[TMP2]] -; - %1 = lshr <2 x i32> %x, - %2 = and <2 x i32> %1, - %3 = add <2 x i32> %y, %2 - %4 = shl <2 x i32> %3, - ret <2 x i32> %4 -} - -define <2 x i32> @lshr_add_and_shl_v2i32_undef(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @lshr_add_and_shl_v2i32_undef( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[Y:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i32> [[TMP3]], -; CHECK-NEXT: ret <2 x i32> [[TMP4]] -; - %1 = lshr <2 x i32> %x, - %2 = and <2 x i32> %1, - %3 = add <2 x i32> %y, %2 - %4 = shl <2 x i32> %3, - ret <2 x i32> %4 -} - -define <2 x i32> @lshr_add_and_shl_v2i32_nonuniform(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @lshr_add_and_shl_v2i32_nonuniform( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[Y:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i32> [[TMP3]], -; CHECK-NEXT: ret <2 x i32> [[TMP4]] -; - %1 = lshr <2 x i32> %x, - %2 = and <2 x i32> %1, - %3 = add <2 x i32> %y, %2 - %4 = shl <2 x i32> %3, - ret <2 x i32> %4 -} - -define i32 @shl_add_and_lshr(i32 %x, i32 %y) { -; CHECK-LABEL: @shl_add_and_lshr( -; CHECK-NEXT: [[C1:%.*]] = shl i32 [[Y:%.*]], 4 -; CHECK-NEXT: [[X_MASK:%.*]] = and i32 [[X:%.*]], 128 -; CHECK-NEXT: [[D:%.*]] = add i32 [[X_MASK]], [[C1]] -; CHECK-NEXT: ret i32 [[D]] -; - %a = lshr i32 %x, 4 - %b = and i32 %a, 8 - %c = add i32 %b, %y - %d = shl i32 %c, 4 - ret i32 %d -} - -define <2 x i32> @shl_add_and_lshr_v2i32(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @shl_add_and_lshr_v2i32( -; CHECK-NEXT: [[C1:%.*]] = shl <2 x i32> [[Y:%.*]], -; CHECK-NEXT: [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[D:%.*]] = add <2 x i32> [[X_MASK]], [[C1]] -; CHECK-NEXT: ret <2 x i32> [[D]] -; - %a = lshr <2 x i32> %x, - %b = and <2 x i32> %a, - %c = add <2 x i32> %b, %y - %d = shl <2 x i32> %c, - ret <2 x i32> %d -} - -define <2 x i32> @shl_add_and_lshr_v2i32_undef(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @shl_add_and_lshr_v2i32_undef( -; CHECK-NEXT: [[A:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A]], -; CHECK-NEXT: [[C:%.*]] = add <2 x i32> [[B]], [[Y:%.*]] -; CHECK-NEXT: [[D:%.*]] = shl <2 x i32> [[C]], -; CHECK-NEXT: ret <2 x i32> [[D]] -; - %a = lshr <2 x i32> %x, - %b = and <2 x i32> %a, - %c = add <2 x i32> %b, %y - %d = shl <2 x i32> %c, - ret <2 x i32> %d -} - -define <2 x i32> @shl_add_and_lshr_v2i32_nonuniform(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @shl_add_and_lshr_v2i32_nonuniform( -; CHECK-NEXT: [[A:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A]], -; CHECK-NEXT: [[C:%.*]] = add <2 x i32> [[B]], [[Y:%.*]] -; CHECK-NEXT: [[D:%.*]] = shl <2 x i32> [[C]], -; CHECK-NEXT: ret <2 x i32> [[D]] -; - %a = lshr <2 x i32> %x, - %b = and <2 x i32> %a, - %c = add <2 x i32> %b, %y - %d = shl <2 x i32> %c, - ret <2 x i32> %d -} diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index f644ed9..11d9be0 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -345,34 +345,6 @@ define i8 @test24(i8 %X) { ret i8 %Q } -define i32 @test25(i32 %i.2, i32 %AA) { -; CHECK-LABEL: @test25( -; CHECK-NEXT: [[I_3:%.*]] = and i32 [[I_2:%.*]], -131072 -; CHECK-NEXT: [[X2:%.*]] = add i32 [[I_3]], [[AA:%.*]] -; CHECK-NEXT: [[I_6:%.*]] = and i32 [[X2]], -131072 -; CHECK-NEXT: ret i32 [[I_6]] -; - %x = lshr i32 %AA, 17 - %i.3 = lshr i32 %i.2, 17 - %i.5 = add i32 %i.3, %x - %i.6 = shl i32 %i.5, 17 - ret i32 %i.6 -} - -define <2 x i32> @test25_vector(<2 x i32> %i.2, <2 x i32> %AA) { -; CHECK-LABEL: @test25_vector( -; CHECK-NEXT: [[I_3:%.*]] = and <2 x i32> [[I_2:%.*]], -; CHECK-NEXT: [[X2:%.*]] = add <2 x i32> [[I_3]], [[AA:%.*]] -; CHECK-NEXT: [[I_6:%.*]] = and <2 x i32> [[X2]], -; CHECK-NEXT: ret <2 x i32> [[I_6]] -; - %x = lshr <2 x i32> %AA, - %i.3 = lshr <2 x i32> %i.2, - %i.5 = add <2 x i32> %i.3, %x - %i.6 = shl <2 x i32> %i.5, - ret <2 x i32> %i.6 -} - ;; handle casts between shifts. define i32 @test26(i32 %A) { ; CHECK-LABEL: @test26( diff --git a/llvm/test/Transforms/InstCombine/shl-bo.ll b/llvm/test/Transforms/InstCombine/shl-bo.ll new file mode 100644 index 0000000..f581ca2 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/shl-bo.ll @@ -0,0 +1,485 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +define i8 @lshr_add(i8 %a, i8 %y) { +; CHECK-LABEL: @lshr_add( +; CHECK-NEXT: [[X:%.*]] = srem i8 [[A:%.*]], 42 +; CHECK-NEXT: [[B1:%.*]] = shl i8 [[X]], 5 +; CHECK-NEXT: [[R2:%.*]] = add i8 [[B1]], [[Y:%.*]] +; CHECK-NEXT: [[L:%.*]] = and i8 [[R2]], -32 +; CHECK-NEXT: ret i8 [[L]] +; + %x = srem i8 %a, 42 ; thwart complexity-based canonicalization + %r = lshr i8 %y, 5 + %b = add i8 %r, %x + %l = shl i8 %b, 5 + ret i8 %l +} + +define <2 x i8> @lshr_add_commute_splat(<2 x i8> %a, <2 x i8> %y) { +; CHECK-LABEL: @lshr_add_commute_splat( +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], +; CHECK-NEXT: [[R2:%.*]] = add <2 x i8> [[B1]], [[Y:%.*]] +; CHECK-NEXT: [[L:%.*]] = and <2 x i8> [[R2]], +; CHECK-NEXT: ret <2 x i8> [[L]] +; + %x = srem <2 x i8> %a, ; thwart complexity-based canonicalization + %r = lshr <2 x i8> %y, + %b = add <2 x i8> %x, %r + %l = shl <2 x i8> %b, + ret <2 x i8> %l +} + +define i8 @lshr_sub(i8 %a, i8 %y) { +; CHECK-LABEL: @lshr_sub( +; CHECK-NEXT: [[X:%.*]] = srem i8 [[A:%.*]], 42 +; CHECK-NEXT: [[R:%.*]] = lshr i8 [[Y:%.*]], 3 +; CHECK-NEXT: [[B:%.*]] = sub nsw i8 [[X]], [[R]] +; CHECK-NEXT: [[L:%.*]] = shl i8 [[B]], 3 +; CHECK-NEXT: ret i8 [[L]] +; + %x = srem i8 %a, 42 ; thwart complexity-based canonicalization + %r = lshr i8 %y, 3 + %b = sub i8 %x, %r + %l = shl i8 %b, 3 + ret i8 %l +} + +define <2 x i8> @lshr_sub_commute_splat(<2 x i8> %a, <2 x i8> %y) { +; CHECK-LABEL: @lshr_sub_commute_splat( +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[B1_NEG:%.*]] = mul <2 x i8> [[X]], +; CHECK-NEXT: [[R2:%.*]] = add <2 x i8> [[B1_NEG]], [[Y:%.*]] +; CHECK-NEXT: [[L:%.*]] = and <2 x i8> [[R2]], +; CHECK-NEXT: ret <2 x i8> [[L]] +; + %x = srem <2 x i8> %a, ; thwart complexity-based canonicalization + %r = lshr <2 x i8> %y, + %b = sub <2 x i8> %r, %x + %l = shl <2 x i8> %b, + ret <2 x i8> %l +} + +define i8 @lshr_and(i8 %a, i8 %y) { +; CHECK-LABEL: @lshr_and( +; CHECK-NEXT: [[X:%.*]] = srem i8 [[A:%.*]], 42 +; CHECK-NEXT: [[B1:%.*]] = shl i8 [[X]], 6 +; CHECK-NEXT: [[R2:%.*]] = and i8 [[B1]], [[Y:%.*]] +; CHECK-NEXT: ret i8 [[R2]] +; + %x = srem i8 %a, 42 ; thwart complexity-based canonicalization + %r = lshr i8 %y, 6 + %b = and i8 %r, %x + %l = shl i8 %b, 6 + ret i8 %l +} + +define <2 x i8> @lshr_and_commute_splat(<2 x i8> %a, <2 x i8> %y) { +; CHECK-LABEL: @lshr_and_commute_splat( +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], +; CHECK-NEXT: [[R2:%.*]] = and <2 x i8> [[B1]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i8> [[R2]] +; + %x = srem <2 x i8> %a, ; thwart complexity-based canonicalization + %r = lshr <2 x i8> %y, + %b = and <2 x i8> %x, %r + %l = shl <2 x i8> %b, + ret <2 x i8> %l +} + +define i8 @lshr_or(i8 %a, i8 %y) { +; CHECK-LABEL: @lshr_or( +; CHECK-NEXT: [[X:%.*]] = srem i8 [[A:%.*]], 42 +; CHECK-NEXT: [[B1:%.*]] = shl i8 [[X]], 4 +; CHECK-NEXT: [[Y_MASKED:%.*]] = and i8 [[Y:%.*]], -16 +; CHECK-NEXT: [[L:%.*]] = or i8 [[B1]], [[Y_MASKED]] +; CHECK-NEXT: ret i8 [[L]] +; + %x = srem i8 %a, 42 ; thwart complexity-based canonicalization + %r = lshr i8 %y, 4 + %b = or i8 %x, %r + %l = shl i8 %b, 4 + ret i8 %l +} + +define <2 x i8> @lshr_or_commute_splat(<2 x i8> %a, <2 x i8> %y) { +; CHECK-LABEL: @lshr_or_commute_splat( +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], +; CHECK-NEXT: [[Y_MASKED:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[L:%.*]] = or <2 x i8> [[B1]], [[Y_MASKED]] +; CHECK-NEXT: ret <2 x i8> [[L]] +; + %x = srem <2 x i8> %a, ; thwart complexity-based canonicalization + %r = lshr <2 x i8> %y, + %b = or <2 x i8> %r, %x + %l = shl <2 x i8> %b, + ret <2 x i8> %l +} + +define i8 @lshr_xor(i8 %a, i8 %y) { +; CHECK-LABEL: @lshr_xor( +; CHECK-NEXT: [[X:%.*]] = srem i8 [[A:%.*]], 42 +; CHECK-NEXT: [[B1:%.*]] = shl i8 [[X]], 3 +; CHECK-NEXT: [[Y_MASKED:%.*]] = and i8 [[Y:%.*]], -8 +; CHECK-NEXT: [[L:%.*]] = xor i8 [[B1]], [[Y_MASKED]] +; CHECK-NEXT: ret i8 [[L]] +; + %x = srem i8 %a, 42 ; thwart complexity-based canonicalization + %r = lshr i8 %y, 3 + %b = xor i8 %r, %x + %l = shl i8 %b, 3 + ret i8 %l +} + +define <2 x i8> @lshr_xor_commute_splat(<2 x i8> %a, <2 x i8> %y) { +; CHECK-LABEL: @lshr_xor_commute_splat( +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], +; CHECK-NEXT: [[Y_MASKED:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[L:%.*]] = xor <2 x i8> [[B1]], [[Y_MASKED]] +; CHECK-NEXT: ret <2 x i8> [[L]] +; + %x = srem <2 x i8> %a, ; thwart complexity-based canonicalization + %r = lshr <2 x i8> %y, + %b = xor <2 x i8> %x, %r + %l = shl <2 x i8> %b, + ret <2 x i8> %l +} + +define i8 @lshr_and_add(i8 %a, i8 %y) { +; CHECK-LABEL: @lshr_and_add( +; CHECK-NEXT: [[X:%.*]] = srem i8 [[A:%.*]], 42 +; CHECK-NEXT: [[B1:%.*]] = shl i8 [[X]], 3 +; CHECK-NEXT: [[Y_MASK:%.*]] = and i8 [[Y:%.*]], 96 +; CHECK-NEXT: [[L:%.*]] = add i8 [[B1]], [[Y_MASK]] +; CHECK-NEXT: ret i8 [[L]] +; + %x = srem i8 %a, 42 ; thwart complexity-based canonicalization + %r = lshr i8 %y, 3 + %m = and i8 %r, 12 + %b = add i8 %x, %m + %l = shl i8 %b, 3 + ret i8 %l +} + +define <2 x i8> @lshr_and_add_commute_splat(<2 x i8> %a, <2 x i8> %y) { +; CHECK-LABEL: @lshr_and_add_commute_splat( +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], +; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[L:%.*]] = add <2 x i8> [[Y_MASK]], [[B1]] +; CHECK-NEXT: ret <2 x i8> [[L]] +; + %x = srem <2 x i8> %a, ; thwart complexity-based canonicalization + %r = lshr <2 x i8> %y, + %m = and <2 x i8> %r, + %b = add <2 x i8> %m, %x + %l = shl <2 x i8> %b, + ret <2 x i8> %l +} + +define i8 @lshr_and_sub(i8 %a, i8 %y) { +; CHECK-LABEL: @lshr_and_sub( +; CHECK-NEXT: [[X:%.*]] = srem i8 [[A:%.*]], 42 +; CHECK-NEXT: [[R:%.*]] = lshr i8 [[Y:%.*]], 2 +; CHECK-NEXT: [[M:%.*]] = and i8 [[R]], 13 +; CHECK-NEXT: [[B:%.*]] = sub nsw i8 [[X]], [[M]] +; CHECK-NEXT: [[L:%.*]] = shl i8 [[B]], 2 +; CHECK-NEXT: ret i8 [[L]] +; + %x = srem i8 %a, 42 ; thwart complexity-based canonicalization + %r = lshr i8 %y, 2 + %m = and i8 %r, 13 + %b = sub i8 %x, %m + %l = shl i8 %b, 2 + ret i8 %l +} + +define <2 x i8> @lshr_and_sub_commute_splat(<2 x i8> %a, <2 x i8> %y) { +; CHECK-LABEL: @lshr_and_sub_commute_splat( +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[B1_NEG:%.*]] = mul <2 x i8> [[X]], +; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[L:%.*]] = add <2 x i8> [[B1_NEG]], [[Y_MASK]] +; CHECK-NEXT: ret <2 x i8> [[L]] +; + %x = srem <2 x i8> %a, ; thwart complexity-based canonicalization + %r = lshr <2 x i8> %y, + %m = and <2 x i8> %r, + %b = sub <2 x i8> %m, %x + %l = shl <2 x i8> %b, + ret <2 x i8> %l +} + +define i8 @lshr_and_and(i8 %a, i8 %y) { +; CHECK-LABEL: @lshr_and_and( +; CHECK-NEXT: [[X:%.*]] = srem i8 [[A:%.*]], 42 +; CHECK-NEXT: [[B1:%.*]] = shl i8 [[X]], 2 +; CHECK-NEXT: [[Y_MASK:%.*]] = and i8 [[Y:%.*]], 52 +; CHECK-NEXT: [[L:%.*]] = and i8 [[Y_MASK]], [[B1]] +; CHECK-NEXT: ret i8 [[L]] +; + %x = srem i8 %a, 42 ; thwart complexity-based canonicalization + %r = lshr i8 %y, 2 + %m = and i8 %r, 13 + %b = and i8 %m, %x + %l = shl i8 %b, 2 + ret i8 %l +} + +define <2 x i8> @lshr_and_and_commute_splat(<2 x i8> %a, <2 x i8> %y) { +; CHECK-LABEL: @lshr_and_and_commute_splat( +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], +; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[L:%.*]] = and <2 x i8> [[B1]], [[Y_MASK]] +; CHECK-NEXT: ret <2 x i8> [[L]] +; + %x = srem <2 x i8> %a, ; thwart complexity-based canonicalization + %r = lshr <2 x i8> %y, + %m = and <2 x i8> %r, + %b = and <2 x i8> %x, %m + %l = shl <2 x i8> %b, + ret <2 x i8> %l +} + +define i8 @lshr_and_or(i8 %a, i8 %y) { +; CHECK-LABEL: @lshr_and_or( +; CHECK-NEXT: [[X:%.*]] = srem i8 [[A:%.*]], 42 +; CHECK-NEXT: [[B1:%.*]] = shl i8 [[X]], 2 +; CHECK-NEXT: [[Y_MASK:%.*]] = and i8 [[Y:%.*]], 52 +; CHECK-NEXT: [[L:%.*]] = or i8 [[B1]], [[Y_MASK]] +; CHECK-NEXT: ret i8 [[L]] +; + %x = srem i8 %a, 42 ; thwart complexity-based canonicalization + %r = lshr i8 %y, 2 + %m = and i8 %r, 13 + %b = or i8 %x, %m + %l = shl i8 %b, 2 + ret i8 %l +} + +define <2 x i8> @lshr_and_or_commute_splat(<2 x i8> %a, <2 x i8> %y) { +; CHECK-LABEL: @lshr_and_or_commute_splat( +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], +; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[L:%.*]] = or <2 x i8> [[Y_MASK]], [[B1]] +; CHECK-NEXT: ret <2 x i8> [[L]] +; + %x = srem <2 x i8> %a, ; thwart complexity-based canonicalization + %r = lshr <2 x i8> %y, + %m = and <2 x i8> %r, + %b = or <2 x i8> %m, %x + %l = shl <2 x i8> %b, + ret <2 x i8> %l +} + +define i8 @lshr_and_xor(i8 %a, i8 %y) { +; CHECK-LABEL: @lshr_and_xor( +; CHECK-NEXT: [[X:%.*]] = srem i8 [[A:%.*]], 42 +; CHECK-NEXT: [[B1:%.*]] = shl i8 [[X]], 2 +; CHECK-NEXT: [[Y_MASK:%.*]] = and i8 [[Y:%.*]], 52 +; CHECK-NEXT: [[L:%.*]] = xor i8 [[Y_MASK]], [[B1]] +; CHECK-NEXT: ret i8 [[L]] +; + %x = srem i8 %a, 42 ; thwart complexity-based canonicalization + %r = lshr i8 %y, 2 + %m = and i8 %r, 13 + %b = xor i8 %m, %x + %l = shl i8 %b, 2 + ret i8 %l +} + +define <2 x i8> @lshr_and_xor_commute_splat(<2 x i8> %a, <2 x i8> %y) { +; CHECK-LABEL: @lshr_and_xor_commute_splat( +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], +; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[L:%.*]] = xor <2 x i8> [[B1]], [[Y_MASK]] +; CHECK-NEXT: ret <2 x i8> [[L]] +; + %x = srem <2 x i8> %a, ; thwart complexity-based canonicalization + %r = lshr <2 x i8> %y, + %m = and <2 x i8> %r, + %b = xor <2 x i8> %x, %m + %l = shl <2 x i8> %b, + ret <2 x i8> %l +} + +define <2 x i8> @lshr_add_shl_v2i8_undef(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @lshr_add_shl_v2i8_undef( +; CHECK-NEXT: [[A:%.*]] = lshr <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[B:%.*]] = add <2 x i8> [[A]], [[X:%.*]] +; CHECK-NEXT: [[C:%.*]] = shl <2 x i8> [[B]], +; CHECK-NEXT: ret <2 x i8> [[C]] +; + %a = lshr <2 x i8> %y, + %b = add <2 x i8> %a, %x + %c = shl <2 x i8> %b, + ret <2 x i8> %c +} + +define <2 x i8> @lshr_add_shl_v2i8_nonuniform(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @lshr_add_shl_v2i8_nonuniform( +; CHECK-NEXT: [[A:%.*]] = lshr <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[B:%.*]] = add <2 x i8> [[A]], [[X:%.*]] +; CHECK-NEXT: [[C:%.*]] = shl <2 x i8> [[B]], +; CHECK-NEXT: ret <2 x i8> [[C]] +; + %a = lshr <2 x i8> %y, + %b = add <2 x i8> %a, %x + %c = shl <2 x i8> %b, + ret <2 x i8> %c +} + +define i32 @lshr_add_and_shl(i32 %x, i32 %y) { +; CHECK-LABEL: @lshr_add_and_shl( +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[Y:%.*]], 5 +; CHECK-NEXT: [[X_MASK:%.*]] = and i32 [[X:%.*]], 4064 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[X_MASK]], [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP2]] +; + %1 = lshr i32 %x, 5 + %2 = and i32 %1, 127 + %3 = add i32 %y, %2 + %4 = shl i32 %3, 5 + ret i32 %4 +} + +define <2 x i32> @lshr_add_and_shl_v2i32(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @lshr_add_and_shl_v2i32( +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[X_MASK]], [[TMP1]] +; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; + %1 = lshr <2 x i32> %x, + %2 = and <2 x i32> %1, + %3 = add <2 x i32> %y, %2 + %4 = shl <2 x i32> %3, + ret <2 x i32> %4 +} + +define <2 x i32> @lshr_add_and_shl_v2i32_undef(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @lshr_add_and_shl_v2i32_undef( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i32> [[TMP3]], +; CHECK-NEXT: ret <2 x i32> [[TMP4]] +; + %1 = lshr <2 x i32> %x, + %2 = and <2 x i32> %1, + %3 = add <2 x i32> %y, %2 + %4 = shl <2 x i32> %3, + ret <2 x i32> %4 +} + +define <2 x i32> @lshr_add_and_shl_v2i32_nonuniform(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @lshr_add_and_shl_v2i32_nonuniform( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i32> [[TMP3]], +; CHECK-NEXT: ret <2 x i32> [[TMP4]] +; + %1 = lshr <2 x i32> %x, + %2 = and <2 x i32> %1, + %3 = add <2 x i32> %y, %2 + %4 = shl <2 x i32> %3, + ret <2 x i32> %4 +} + +define i32 @shl_add_and_lshr(i32 %x, i32 %y) { +; CHECK-LABEL: @shl_add_and_lshr( +; CHECK-NEXT: [[C1:%.*]] = shl i32 [[Y:%.*]], 4 +; CHECK-NEXT: [[X_MASK:%.*]] = and i32 [[X:%.*]], 128 +; CHECK-NEXT: [[D:%.*]] = add i32 [[X_MASK]], [[C1]] +; CHECK-NEXT: ret i32 [[D]] +; + %a = lshr i32 %x, 4 + %b = and i32 %a, 8 + %c = add i32 %b, %y + %d = shl i32 %c, 4 + ret i32 %d +} + +define <2 x i32> @shl_add_and_lshr_v2i32(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @shl_add_and_lshr_v2i32( +; CHECK-NEXT: [[C1:%.*]] = shl <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[D:%.*]] = add <2 x i32> [[X_MASK]], [[C1]] +; CHECK-NEXT: ret <2 x i32> [[D]] +; + %a = lshr <2 x i32> %x, + %b = and <2 x i32> %a, + %c = add <2 x i32> %b, %y + %d = shl <2 x i32> %c, + ret <2 x i32> %d +} + +define <2 x i32> @shl_add_and_lshr_v2i32_undef(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @shl_add_and_lshr_v2i32_undef( +; CHECK-NEXT: [[A:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A]], +; CHECK-NEXT: [[C:%.*]] = add <2 x i32> [[B]], [[Y:%.*]] +; CHECK-NEXT: [[D:%.*]] = shl <2 x i32> [[C]], +; CHECK-NEXT: ret <2 x i32> [[D]] +; + %a = lshr <2 x i32> %x, + %b = and <2 x i32> %a, + %c = add <2 x i32> %b, %y + %d = shl <2 x i32> %c, + ret <2 x i32> %d +} + +define <2 x i32> @shl_add_and_lshr_v2i32_nonuniform(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @shl_add_and_lshr_v2i32_nonuniform( +; CHECK-NEXT: [[A:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A]], +; CHECK-NEXT: [[C:%.*]] = add <2 x i32> [[B]], [[Y:%.*]] +; CHECK-NEXT: [[D:%.*]] = shl <2 x i32> [[C]], +; CHECK-NEXT: ret <2 x i32> [[D]] +; + %a = lshr <2 x i32> %x, + %b = and <2 x i32> %a, + %c = add <2 x i32> %b, %y + %d = shl <2 x i32> %c, + ret <2 x i32> %d +} + +define <4 x i32> @test_FoldShiftByConstant_CreateSHL(<4 x i32> %in) { +; CHECK-LABEL: @test_FoldShiftByConstant_CreateSHL( +; CHECK-NEXT: [[VSHL_N:%.*]] = mul <4 x i32> [[IN:%.*]], +; CHECK-NEXT: ret <4 x i32> [[VSHL_N]] +; + %mul.i = mul <4 x i32> %in, + %vshl_n = shl <4 x i32> %mul.i, + ret <4 x i32> %vshl_n +} + +define <8 x i16> @test_FoldShiftByConstant_CreateSHL2(<8 x i16> %in) { +; CHECK-LABEL: @test_FoldShiftByConstant_CreateSHL2( +; CHECK-NEXT: [[VSHL_N:%.*]] = mul <8 x i16> [[IN:%.*]], +; CHECK-NEXT: ret <8 x i16> [[VSHL_N]] +; + %mul.i = mul <8 x i16> %in, + %vshl_n = shl <8 x i16> %mul.i, + ret <8 x i16> %vshl_n +} + +define <16 x i8> @test_FoldShiftByConstant_CreateAnd(<16 x i8> %in0) { +; CHECK-LABEL: @test_FoldShiftByConstant_CreateAnd( +; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i8> [[IN0:%.*]], +; CHECK-NEXT: [[VSHL_N:%.*]] = and <16 x i8> [[TMP1]], +; CHECK-NEXT: ret <16 x i8> [[VSHL_N]] +; + %vsra_n = ashr <16 x i8> %in0, + %tmp = add <16 x i8> %in0, %vsra_n + %vshl_n = shl <16 x i8> %tmp, + ret <16 x i8> %vshl_n +} -- 2.7.4