From 6c52f82d77a1b8d9d9f8b585c73f94e58191b5a9 Mon Sep 17 00:00:00 2001 From: Filipp Zhinkin Date: Mon, 8 Aug 2022 20:54:49 +0300 Subject: [PATCH] [X86][ARM] Add tests for bitwise logic trees of shifts; NFC Baseline tests for D131189. --- llvm/test/CodeGen/ARM/shift-combine.ll | 407 +++++++++++++++++++++++++++++++++ llvm/test/CodeGen/X86/shift-combine.ll | 205 +++++++++++++++++ 2 files changed, 612 insertions(+) diff --git a/llvm/test/CodeGen/ARM/shift-combine.ll b/llvm/test/CodeGen/ARM/shift-combine.ll index 549d709..d0bd44e 100644 --- a/llvm/test/CodeGen/ARM/shift-combine.ll +++ b/llvm/test/CodeGen/ARM/shift-combine.ll @@ -893,3 +893,410 @@ entry: %cmp.i = icmp ugt i32 %bf.cast.i, %AttrArgNo ret i1 %cmp.i } + +define i64 @or_tree_with_shifts_i64(i64 %a, i64 %b, i64 %c, i64 %d) { +; CHECK-ARM-LABEL: or_tree_with_shifts_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: ldr lr, [sp, #16] +; CHECK-ARM-NEXT: lsl r3, r3, #16 +; CHECK-ARM-NEXT: ldr r12, [sp, #8] +; CHECK-ARM-NEXT: orr r3, r3, r2, lsr #16 +; CHECK-ARM-NEXT: orr r0, r0, r2, lsl #16 +; CHECK-ARM-NEXT: orr r1, r1, lr, lsl #16 +; CHECK-ARM-NEXT: orr r1, r1, r3 +; CHECK-ARM-NEXT: orr r1, r1, r12 +; CHECK-ARM-NEXT: pop {r11, pc} +; +; CHECK-BE-LABEL: or_tree_with_shifts_i64: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: .save {r11, lr} +; CHECK-BE-NEXT: push {r11, lr} +; CHECK-BE-NEXT: ldr lr, [sp, #20] +; CHECK-BE-NEXT: lsl r2, r2, #16 +; CHECK-BE-NEXT: ldr r12, [sp, #12] +; CHECK-BE-NEXT: orr r2, r2, r3, lsr #16 +; CHECK-BE-NEXT: orr r1, r1, r3, lsl #16 +; CHECK-BE-NEXT: orr r0, r0, lr, lsl #16 +; CHECK-BE-NEXT: orr r0, r0, r2 +; CHECK-BE-NEXT: orr r0, r0, r12 +; CHECK-BE-NEXT: pop {r11, pc} +; +; CHECK-ALIGN-LABEL: or_tree_with_shifts_i64: +; CHECK-ALIGN: @ %bb.0: +; CHECK-ALIGN-NEXT: ldr.w r12, [sp, #8] +; CHECK-ALIGN-NEXT: lsls r3, r3, #16 +; CHECK-ALIGN-NEXT: orr.w r3, r3, r2, lsr #16 +; CHECK-ALIGN-NEXT: orr.w r0, r0, r2, lsl #16 +; CHECK-ALIGN-NEXT: orr.w r1, r1, r12, lsl #16 +; CHECK-ALIGN-NEXT: orrs r1, r3 +; CHECK-ALIGN-NEXT: ldr r3, [sp] +; CHECK-ALIGN-NEXT: orrs r1, r3 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: or_tree_with_shifts_i64: +; CHECK-V6M: @ %bb.0: +; CHECK-V6M-NEXT: push {r4, lr} +; CHECK-V6M-NEXT: lsrs r4, r2, #16 +; CHECK-V6M-NEXT: lsls r3, r3, #16 +; CHECK-V6M-NEXT: adds r3, r3, r4 +; CHECK-V6M-NEXT: ldr r4, [sp, #16] +; CHECK-V6M-NEXT: lsls r4, r4, #16 +; CHECK-V6M-NEXT: orrs r1, r4 +; CHECK-V6M-NEXT: orrs r1, r3 +; CHECK-V6M-NEXT: ldr r3, [sp, #8] +; CHECK-V6M-NEXT: orrs r1, r3 +; CHECK-V6M-NEXT: lsls r2, r2, #16 +; CHECK-V6M-NEXT: orrs r0, r2 +; CHECK-V6M-NEXT: pop {r4, pc} + %b.shifted = shl i64 %b, 16 + %c.shifted = shl i64 %c, 32 + %d.shifted = shl i64 %d, 48 + %or.ad = or i64 %a, %d.shifted + %or.adb = or i64 %or.ad, %b.shifted + %or.adbc = or i64 %or.adb, %c.shifted + ret i64 %or.adbc +} + +define i32 @or_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-ARM-LABEL: or_tree_with_shifts_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: orr r2, r3, r2, lsl #16 +; CHECK-ARM-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-ARM-NEXT: orr r0, r0, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: or_tree_with_shifts_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: orr r2, r3, r2, lsl #16 +; CHECK-BE-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-BE-NEXT: orr r0, r0, r2 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: or_tree_with_shifts_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: orr.w r2, r3, r2, lsl #16 +; CHECK-THUMB-NEXT: orr.w r0, r1, r0, lsl #16 +; CHECK-THUMB-NEXT: orrs r0, r2 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: or_tree_with_shifts_i32: +; CHECK-ALIGN: @ %bb.0: +; CHECK-ALIGN-NEXT: orr.w r2, r3, r2, lsl #16 +; CHECK-ALIGN-NEXT: orr.w r0, r1, r0, lsl #16 +; CHECK-ALIGN-NEXT: orrs r0, r2 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: or_tree_with_shifts_i32: +; CHECK-V6M: @ %bb.0: +; CHECK-V6M-NEXT: lsls r2, r2, #16 +; CHECK-V6M-NEXT: orrs r2, r3 +; CHECK-V6M-NEXT: lsls r0, r0, #16 +; CHECK-V6M-NEXT: orrs r0, r1 +; CHECK-V6M-NEXT: orrs r0, r2 +; CHECK-V6M-NEXT: bx lr + %a.shifted = shl i32 %a, 16 + %c.shifted = shl i32 %c, 16 + %or.ab = or i32 %a.shifted, %b + %or.cd = or i32 %c.shifted, %d + %r = or i32 %or.ab, %or.cd + ret i32 %r +} + +define i32 @xor_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-ARM-LABEL: xor_tree_with_shifts_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: eor r2, r3, r2, lsr #16 +; CHECK-ARM-NEXT: eor r0, r1, r0, lsr #16 +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: xor_tree_with_shifts_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: eor r2, r3, r2, lsr #16 +; CHECK-BE-NEXT: eor r0, r1, r0, lsr #16 +; CHECK-BE-NEXT: eor r0, r0, r2 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: xor_tree_with_shifts_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: eor.w r2, r3, r2, lsr #16 +; CHECK-THUMB-NEXT: eor.w r0, r1, r0, lsr #16 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: xor_tree_with_shifts_i32: +; CHECK-ALIGN: @ %bb.0: +; CHECK-ALIGN-NEXT: eor.w r2, r3, r2, lsr #16 +; CHECK-ALIGN-NEXT: eor.w r0, r1, r0, lsr #16 +; CHECK-ALIGN-NEXT: eors r0, r2 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: xor_tree_with_shifts_i32: +; CHECK-V6M: @ %bb.0: +; CHECK-V6M-NEXT: lsrs r2, r2, #16 +; CHECK-V6M-NEXT: eors r2, r3 +; CHECK-V6M-NEXT: lsrs r0, r0, #16 +; CHECK-V6M-NEXT: eors r0, r1 +; CHECK-V6M-NEXT: eors r0, r2 +; CHECK-V6M-NEXT: bx lr + %a.shifted = lshr i32 %a, 16 + %c.shifted = lshr i32 %c, 16 + %xor.ab = xor i32 %a.shifted, %b + %xor.cd = xor i32 %c.shifted, %d + %r = xor i32 %xor.ab, %xor.cd + ret i32 %r +} + +define i32 @and_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-ARM-LABEL: and_tree_with_shifts_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r2, r3, r2, asr #16 +; CHECK-ARM-NEXT: and r0, r1, r0, asr #16 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: and_tree_with_shifts_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: and r2, r3, r2, asr #16 +; CHECK-BE-NEXT: and r0, r1, r0, asr #16 +; CHECK-BE-NEXT: and r0, r0, r2 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: and_tree_with_shifts_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: and.w r2, r3, r2, asr #16 +; CHECK-THUMB-NEXT: and.w r0, r1, r0, asr #16 +; CHECK-THUMB-NEXT: ands r0, r2 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: and_tree_with_shifts_i32: +; CHECK-ALIGN: @ %bb.0: +; CHECK-ALIGN-NEXT: and.w r2, r3, r2, asr #16 +; CHECK-ALIGN-NEXT: and.w r0, r1, r0, asr #16 +; CHECK-ALIGN-NEXT: ands r0, r2 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: and_tree_with_shifts_i32: +; CHECK-V6M: @ %bb.0: +; CHECK-V6M-NEXT: asrs r2, r2, #16 +; CHECK-V6M-NEXT: ands r2, r3 +; CHECK-V6M-NEXT: asrs r0, r0, #16 +; CHECK-V6M-NEXT: ands r0, r1 +; CHECK-V6M-NEXT: ands r0, r2 +; CHECK-V6M-NEXT: bx lr + %a.shifted = ashr i32 %a, 16 + %c.shifted = ashr i32 %c, 16 + %and.ab = and i32 %a.shifted, %b + %and.cd = and i32 %c.shifted, %d + %r = and i32 %and.ab, %and.cd + ret i32 %r +} + +define i32 @logic_tree_with_shifts_var_i32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %s) { +; CHECK-ARM-LABEL: logic_tree_with_shifts_var_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: ldr r12, [sp] +; CHECK-ARM-NEXT: orr r2, r3, r2, lsl r12 +; CHECK-ARM-NEXT: orr r0, r1, r0, lsl r12 +; CHECK-ARM-NEXT: orr r0, r0, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: logic_tree_with_shifts_var_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldr r12, [sp] +; CHECK-BE-NEXT: orr r2, r3, r2, lsl r12 +; CHECK-BE-NEXT: orr r0, r1, r0, lsl r12 +; CHECK-BE-NEXT: orr r0, r0, r2 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: logic_tree_with_shifts_var_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: ldr.w r12, [sp] +; CHECK-THUMB-NEXT: lsl.w r2, r2, r12 +; CHECK-THUMB-NEXT: lsl.w r0, r0, r12 +; CHECK-THUMB-NEXT: orrs r2, r3 +; CHECK-THUMB-NEXT: orrs r0, r1 +; CHECK-THUMB-NEXT: orrs r0, r2 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: logic_tree_with_shifts_var_i32: +; CHECK-ALIGN: @ %bb.0: +; CHECK-ALIGN-NEXT: ldr.w r12, [sp] +; CHECK-ALIGN-NEXT: lsl.w r2, r2, r12 +; CHECK-ALIGN-NEXT: lsl.w r0, r0, r12 +; CHECK-ALIGN-NEXT: orrs r2, r3 +; CHECK-ALIGN-NEXT: orrs r0, r1 +; CHECK-ALIGN-NEXT: orrs r0, r2 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: logic_tree_with_shifts_var_i32: +; CHECK-V6M: @ %bb.0: +; CHECK-V6M-NEXT: push {r4, lr} +; CHECK-V6M-NEXT: ldr r4, [sp, #8] +; CHECK-V6M-NEXT: lsls r2, r4 +; CHECK-V6M-NEXT: orrs r2, r3 +; CHECK-V6M-NEXT: lsls r0, r4 +; CHECK-V6M-NEXT: orrs r0, r1 +; CHECK-V6M-NEXT: orrs r0, r2 +; CHECK-V6M-NEXT: pop {r4, pc} + %a.shifted = shl i32 %a, %s + %c.shifted = shl i32 %c, %s + %or.ab = or i32 %a.shifted, %b + %or.cd = or i32 %c.shifted, %d + %r = or i32 %or.ab, %or.cd + ret i32 %r +} + +define i32 @logic_tree_with_mismatching_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-ARM-LABEL: logic_tree_with_mismatching_shifts_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: orr r2, r3, r2, lsl #16 +; CHECK-ARM-NEXT: orr r0, r1, r0, lsl #15 +; CHECK-ARM-NEXT: orr r0, r0, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: logic_tree_with_mismatching_shifts_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: orr r2, r3, r2, lsl #16 +; CHECK-BE-NEXT: orr r0, r1, r0, lsl #15 +; CHECK-BE-NEXT: orr r0, r0, r2 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: logic_tree_with_mismatching_shifts_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: orr.w r2, r3, r2, lsl #16 +; CHECK-THUMB-NEXT: orr.w r0, r1, r0, lsl #15 +; CHECK-THUMB-NEXT: orrs r0, r2 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: logic_tree_with_mismatching_shifts_i32: +; CHECK-ALIGN: @ %bb.0: +; CHECK-ALIGN-NEXT: orr.w r2, r3, r2, lsl #16 +; CHECK-ALIGN-NEXT: orr.w r0, r1, r0, lsl #15 +; CHECK-ALIGN-NEXT: orrs r0, r2 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: logic_tree_with_mismatching_shifts_i32: +; CHECK-V6M: @ %bb.0: +; CHECK-V6M-NEXT: lsls r2, r2, #16 +; CHECK-V6M-NEXT: orrs r2, r3 +; CHECK-V6M-NEXT: lsls r0, r0, #15 +; CHECK-V6M-NEXT: orrs r0, r1 +; CHECK-V6M-NEXT: orrs r0, r2 +; CHECK-V6M-NEXT: bx lr + %a.shifted = shl i32 %a, 15 + %c.shifted = shl i32 %c, 16 + %or.ab = or i32 %a.shifted, %b + %or.cd = or i32 %c.shifted, %d + %r = or i32 %or.ab, %or.cd + ret i32 %r +} + +define i32 @logic_tree_with_mismatching_shifts2_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-ARM-LABEL: logic_tree_with_mismatching_shifts2_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: orr r2, r3, r2, lsr #16 +; CHECK-ARM-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-ARM-NEXT: orr r0, r0, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: logic_tree_with_mismatching_shifts2_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: orr r2, r3, r2, lsr #16 +; CHECK-BE-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-BE-NEXT: orr r0, r0, r2 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: logic_tree_with_mismatching_shifts2_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: orr.w r2, r3, r2, lsr #16 +; CHECK-THUMB-NEXT: orr.w r0, r1, r0, lsl #16 +; CHECK-THUMB-NEXT: orrs r0, r2 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: logic_tree_with_mismatching_shifts2_i32: +; CHECK-ALIGN: @ %bb.0: +; CHECK-ALIGN-NEXT: orr.w r2, r3, r2, lsr #16 +; CHECK-ALIGN-NEXT: orr.w r0, r1, r0, lsl #16 +; CHECK-ALIGN-NEXT: orrs r0, r2 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: logic_tree_with_mismatching_shifts2_i32: +; CHECK-V6M: @ %bb.0: +; CHECK-V6M-NEXT: lsrs r2, r2, #16 +; CHECK-V6M-NEXT: orrs r2, r3 +; CHECK-V6M-NEXT: lsls r0, r0, #16 +; CHECK-V6M-NEXT: orrs r0, r1 +; CHECK-V6M-NEXT: orrs r0, r2 +; CHECK-V6M-NEXT: bx lr + %a.shifted = shl i32 %a, 16 + %c.shifted = lshr i32 %c, 16 + %or.ab = or i32 %a.shifted, %b + %or.cd = or i32 %c.shifted, %d + %r = or i32 %or.ab, %or.cd + ret i32 %r +} + +define <4 x i32> @or_tree_with_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { +; CHECK-ARM-LABEL: or_tree_with_shifts_vec_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: vshl.i32 q8, q2, #16 +; CHECK-ARM-NEXT: vshl.i32 q9, q0, #16 +; CHECK-ARM-NEXT: vorr q8, q8, q3 +; CHECK-ARM-NEXT: vorr q9, q9, q1 +; CHECK-ARM-NEXT: vorr q0, q9, q8 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: or_tree_with_shifts_vec_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vrev64.32 q8, q2 +; CHECK-BE-NEXT: vrev64.32 q9, q0 +; CHECK-BE-NEXT: vshl.i32 q8, q8, #16 +; CHECK-BE-NEXT: vrev64.32 q10, q3 +; CHECK-BE-NEXT: vshl.i32 q9, q9, #16 +; CHECK-BE-NEXT: vrev64.32 q11, q1 +; CHECK-BE-NEXT: vorr q8, q8, q10 +; CHECK-BE-NEXT: vorr q9, q9, q11 +; CHECK-BE-NEXT: vorr q8, q9, q8 +; CHECK-BE-NEXT: vrev64.32 q0, q8 +; CHECK-BE-NEXT: bx lr + %a.shifted = shl <4 x i32> %a, + %c.shifted = shl <4 x i32> %c, + %or.ab = or <4 x i32> %a.shifted, %b + %or.cd = or <4 x i32> %c.shifted, %d + %r = or <4 x i32> %or.ab, %or.cd + ret <4 x i32> %r +} + +define <4 x i32> @or_tree_with_mismatching_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { +; CHECK-ARM-LABEL: or_tree_with_mismatching_shifts_vec_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: vshl.i32 q8, q2, #17 +; CHECK-ARM-NEXT: vshl.i32 q9, q0, #16 +; CHECK-ARM-NEXT: vorr q8, q8, q3 +; CHECK-ARM-NEXT: vorr q9, q9, q1 +; CHECK-ARM-NEXT: vorr q0, q9, q8 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: or_tree_with_mismatching_shifts_vec_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vrev64.32 q8, q2 +; CHECK-BE-NEXT: vrev64.32 q9, q0 +; CHECK-BE-NEXT: vshl.i32 q8, q8, #17 +; CHECK-BE-NEXT: vrev64.32 q10, q3 +; CHECK-BE-NEXT: vshl.i32 q9, q9, #16 +; CHECK-BE-NEXT: vrev64.32 q11, q1 +; CHECK-BE-NEXT: vorr q8, q8, q10 +; CHECK-BE-NEXT: vorr q9, q9, q11 +; CHECK-BE-NEXT: vorr q8, q9, q8 +; CHECK-BE-NEXT: vrev64.32 q0, q8 +; CHECK-BE-NEXT: bx lr + %a.shifted = shl <4 x i32> %a, + %c.shifted = shl <4 x i32> %c, + %or.ab = or <4 x i32> %a.shifted, %b + %or.cd = or <4 x i32> %c.shifted, %d + %r = or <4 x i32> %or.ab, %or.cd + ret <4 x i32> %r +} diff --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll index 8f51cfd..5bfaa2f 100644 --- a/llvm/test/CodeGen/X86/shift-combine.ll +++ b/llvm/test/CodeGen/X86/shift-combine.ll @@ -506,3 +506,208 @@ define <4 x i32> @ashr_add_neg_shl_v4i8(<4 x i32> %r) nounwind { %conv1 = ashr <4 x i32> %sext, ret <4 x i32> %conv1 } + +define i32 @or_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; X32-LABEL: or_tree_with_shifts_i32: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: shll $16, %ecx +; X32-NEXT: shll $16, %eax +; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: retl +; +; X64-LABEL: or_tree_with_shifts_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edx, %eax +; X64-NEXT: shll $16, %edi +; X64-NEXT: shll $16, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: orl %edi, %eax +; X64-NEXT: retq + %a.shifted = shl i32 %a, 16 + %c.shifted = shl i32 %c, 16 + %or.ab = or i32 %a.shifted, %b + %or.cd = or i32 %c.shifted, %d + %r = or i32 %or.ab, %or.cd + ret i32 %r +} + +define i32 @xor_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; X32-LABEL: xor_tree_with_shifts_i32: +; X32: # %bb.0: +; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X32-NEXT: xorl %ecx, %eax +; X32-NEXT: retl +; +; X64-LABEL: xor_tree_with_shifts_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edx, %eax +; X64-NEXT: shrl $16, %edi +; X64-NEXT: shrl $16, %eax +; X64-NEXT: xorl %ecx, %eax +; X64-NEXT: xorl %esi, %eax +; X64-NEXT: xorl %edi, %eax +; X64-NEXT: retq + %a.shifted = lshr i32 %a, 16 + %c.shifted = lshr i32 %c, 16 + %xor.ab = xor i32 %a.shifted, %b + %xor.cd = xor i32 %c.shifted, %d + %r = xor i32 %xor.ab, %xor.cd + ret i32 %r +} + +define i32 @and_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; X32-LABEL: and_tree_with_shifts_i32: +; X32: # %bb.0: +; X32-NEXT: movswl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movswl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NEXT: andl %ecx, %eax +; X32-NEXT: retl +; +; X64-LABEL: and_tree_with_shifts_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edx, %eax +; X64-NEXT: sarl $16, %edi +; X64-NEXT: sarl $16, %eax +; X64-NEXT: andl %ecx, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: andl %edi, %eax +; X64-NEXT: retq + %a.shifted = ashr i32 %a, 16 + %c.shifted = ashr i32 %c, 16 + %and.ab = and i32 %a.shifted, %b + %and.cd = and i32 %c.shifted, %d + %r = and i32 %and.ab, %and.cd + ret i32 %r +} + +define i32 @logic_tree_with_shifts_var_i32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %s) { +; X32-LABEL: logic_tree_with_shifts_var_i32: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: shll %cl, %edx +; X32-NEXT: shll %cl, %eax +; X32-NEXT: orl {{[0-9]+}}(%esp), %edx +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %edx, %eax +; X32-NEXT: retl +; +; X64-LABEL: logic_tree_with_shifts_var_i32: +; X64: # %bb.0: +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: movl %r8d, %ecx +; X64-NEXT: shll %cl, %edi +; X64-NEXT: shll %cl, %edx +; X64-NEXT: orl %edx, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: orl %edi, %eax +; X64-NEXT: retq + %a.shifted = shl i32 %a, %s + %c.shifted = shl i32 %c, %s + %or.ab = or i32 %a.shifted, %b + %or.cd = or i32 %c.shifted, %d + %r = or i32 %or.ab, %or.cd + ret i32 %r +} + +define i32 @logic_tree_with_mismatching_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; X32-LABEL: logic_tree_with_mismatching_shifts_i32: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: shll $15, %ecx +; X32-NEXT: shll $16, %eax +; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: retl +; +; X64-LABEL: logic_tree_with_mismatching_shifts_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edx, %eax +; X64-NEXT: shll $15, %edi +; X64-NEXT: shll $16, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: orl %edi, %eax +; X64-NEXT: retq + %a.shifted = shl i32 %a, 15 + %c.shifted = shl i32 %c, 16 + %or.ab = or i32 %a.shifted, %b + %or.cd = or i32 %c.shifted, %d + %r = or i32 %or.ab, %or.cd + ret i32 %r +} + +define i32 @logic_tree_with_mismatching_shifts2_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; X32-LABEL: logic_tree_with_mismatching_shifts2_i32: +; X32: # %bb.0: +; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: shll $16, %ecx +; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: retl +; +; X64-LABEL: logic_tree_with_mismatching_shifts2_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edx, %eax +; X64-NEXT: shll $16, %edi +; X64-NEXT: shrl $16, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: orl %edi, %eax +; X64-NEXT: retq + %a.shifted = shl i32 %a, 16 + %c.shifted = lshr i32 %c, 16 + %or.ab = or i32 %a.shifted, %b + %or.cd = or i32 %c.shifted, %d + %r = or i32 %or.ab, %or.cd + ret i32 %r +} + +define <4 x i32> @or_tree_with_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { +; X64-LABEL: or_tree_with_shifts_vec_i32: +; X64: # %bb.0: +; X64-NEXT: pslld $16, %xmm0 +; X64-NEXT: pslld $16, %xmm2 +; X64-NEXT: por %xmm3, %xmm2 +; X64-NEXT: por %xmm1, %xmm2 +; X64-NEXT: por %xmm2, %xmm0 +; X64-NEXT: retq + %a.shifted = shl <4 x i32> %a, + %c.shifted = shl <4 x i32> %c, + %or.ab = or <4 x i32> %a.shifted, %b + %or.cd = or <4 x i32> %c.shifted, %d + %r = or <4 x i32> %or.ab, %or.cd + ret <4 x i32> %r +} + +define <4 x i32> @or_tree_with_mismatching_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { +; X64-LABEL: or_tree_with_mismatching_shifts_vec_i32: +; X64: # %bb.0: +; X64-NEXT: pslld $16, %xmm0 +; X64-NEXT: pslld $17, %xmm2 +; X64-NEXT: por %xmm3, %xmm2 +; X64-NEXT: por %xmm1, %xmm2 +; X64-NEXT: por %xmm2, %xmm0 +; X64-NEXT: retq + %a.shifted = shl <4 x i32> %a, + %c.shifted = shl <4 x i32> %c, + %or.ab = or <4 x i32> %a.shifted, %b + %or.cd = or <4 x i32> %c.shifted, %d + %r = or <4 x i32> %or.ab, %or.cd + ret <4 x i32> %r +} -- 2.7.4