From 110c5442b885625c28a14f17b60713624b3bba55 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sat, 24 Dec 2022 18:39:57 +0300 Subject: [PATCH] [NFC][Codegen] Add tests with oversized shifts by non-byte-multiple --- .../AArch64/wide-scalar-shift-legalization.ll | 342 + .../PowerPC/wide-scalar-shift-legalization.ll | 2407 ++++++ .../RISCV/wide-scalar-shift-legalization.ll | 4168 +++++++++++ .../CodeGen/X86/wide-scalar-shift-legalization.ll | 7825 ++++++++++++++++++++ 4 files changed, 14742 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll create mode 100644 llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll create mode 100644 llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll create mode 100644 llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll diff --git a/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll new file mode 100644 index 0000000..c9caa58 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll @@ -0,0 +1,342 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=ALL + +define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; ALL-LABEL: lshr_4bytes: +; ALL: // %bb.0: +; ALL-NEXT: ldr w8, [x1] +; ALL-NEXT: ldr w9, [x0] +; ALL-NEXT: lsr w8, w9, w8 +; ALL-NEXT: str w8, [x2] +; ALL-NEXT: ret + %src = load i32, ptr %src.ptr, align 1 + %bitOff = load i32, ptr %bitOff.ptr, align 1 + %res = lshr i32 %src, %bitOff + store i32 %res, ptr %dst, align 1 + ret void +} +define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; ALL-LABEL: shl_4bytes: +; ALL: // %bb.0: +; ALL-NEXT: ldr w8, [x1] +; ALL-NEXT: ldr w9, [x0] +; ALL-NEXT: lsl w8, w9, w8 +; ALL-NEXT: str w8, [x2] +; ALL-NEXT: ret + %src = load i32, ptr %src.ptr, align 1 + %bitOff = load i32, ptr %bitOff.ptr, align 1 + %res = shl i32 %src, %bitOff + store i32 %res, ptr %dst, align 1 + ret void +} +define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; ALL-LABEL: ashr_4bytes: +; ALL: // %bb.0: +; ALL-NEXT: ldr w8, [x1] +; ALL-NEXT: ldr w9, [x0] +; ALL-NEXT: asr w8, w9, w8 +; ALL-NEXT: str w8, [x2] +; ALL-NEXT: ret + %src = load i32, ptr %src.ptr, align 1 + %bitOff = load i32, ptr %bitOff.ptr, align 1 + %res = ashr i32 %src, %bitOff + store i32 %res, ptr %dst, align 1 + ret void +} + +define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; ALL-LABEL: lshr_8bytes: +; ALL: // %bb.0: +; ALL-NEXT: ldr x8, [x1] +; ALL-NEXT: ldr x9, [x0] +; ALL-NEXT: lsr x8, x9, x8 +; ALL-NEXT: str x8, [x2] +; ALL-NEXT: ret + %src = load i64, ptr %src.ptr, align 1 + %bitOff = load i64, ptr %bitOff.ptr, align 1 + %res = lshr i64 %src, %bitOff + store i64 %res, ptr %dst, align 1 + ret void +} +define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; ALL-LABEL: shl_8bytes: +; ALL: // %bb.0: +; ALL-NEXT: ldr x8, [x1] +; ALL-NEXT: ldr x9, [x0] +; ALL-NEXT: lsl x8, x9, x8 +; ALL-NEXT: str x8, [x2] +; ALL-NEXT: ret + %src = load i64, ptr %src.ptr, align 1 + %bitOff = load i64, ptr %bitOff.ptr, align 1 + %res = shl i64 %src, %bitOff + store i64 %res, ptr %dst, align 1 + ret void +} +define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; ALL-LABEL: ashr_8bytes: +; ALL: // %bb.0: +; ALL-NEXT: ldr x8, [x1] +; ALL-NEXT: ldr x9, [x0] +; ALL-NEXT: asr x8, x9, x8 +; ALL-NEXT: str x8, [x2] +; ALL-NEXT: ret + %src = load i64, ptr %src.ptr, align 1 + %bitOff = load i64, ptr %bitOff.ptr, align 1 + %res = ashr i64 %src, %bitOff + store i64 %res, ptr %dst, align 1 + ret void +} + +define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; ALL-LABEL: lshr_16bytes: +; ALL: // %bb.0: +; ALL-NEXT: ldr x8, [x1] +; ALL-NEXT: ldp x10, x9, [x0] +; ALL-NEXT: mvn w11, w8 +; ALL-NEXT: tst x8, #0x40 +; ALL-NEXT: lsr x10, x10, x8 +; ALL-NEXT: lsl x12, x9, #1 +; ALL-NEXT: lsr x9, x9, x8 +; ALL-NEXT: lsl x11, x12, x11 +; ALL-NEXT: orr x8, x11, x10 +; ALL-NEXT: csel x10, xzr, x9, ne +; ALL-NEXT: csel x8, x9, x8, ne +; ALL-NEXT: stp x8, x10, [x2] +; ALL-NEXT: ret + %src = load i128, ptr %src.ptr, align 1 + %bitOff = load i128, ptr %bitOff.ptr, align 1 + %res = lshr i128 %src, %bitOff + store i128 %res, ptr %dst, align 1 + ret void +} +define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; ALL-LABEL: shl_16bytes: +; ALL: // %bb.0: +; ALL-NEXT: ldr x8, [x1] +; ALL-NEXT: ldp x9, x10, [x0] +; ALL-NEXT: mvn w11, w8 +; ALL-NEXT: tst x8, #0x40 +; ALL-NEXT: lsr x12, x9, #1 +; ALL-NEXT: lsl x9, x9, x8 +; ALL-NEXT: lsl x10, x10, x8 +; ALL-NEXT: lsr x11, x12, x11 +; ALL-NEXT: orr x8, x10, x11 +; ALL-NEXT: csel x10, xzr, x9, ne +; ALL-NEXT: csel x8, x9, x8, ne +; ALL-NEXT: stp x10, x8, [x2] +; ALL-NEXT: ret + %src = load i128, ptr %src.ptr, align 1 + %bitOff = load i128, ptr %bitOff.ptr, align 1 + %res = shl i128 %src, %bitOff + store i128 %res, ptr %dst, align 1 + ret void +} +define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; ALL-LABEL: ashr_16bytes: +; ALL: // %bb.0: +; ALL-NEXT: ldr x8, [x1] +; ALL-NEXT: ldp x10, x9, [x0] +; ALL-NEXT: mvn w11, w8 +; ALL-NEXT: tst x8, #0x40 +; ALL-NEXT: lsr x10, x10, x8 +; ALL-NEXT: lsl x12, x9, #1 +; ALL-NEXT: lsl x11, x12, x11 +; ALL-NEXT: asr x12, x9, x8 +; ALL-NEXT: asr x8, x9, #63 +; ALL-NEXT: orr x9, x11, x10 +; ALL-NEXT: csel x8, x8, x12, ne +; ALL-NEXT: csel x9, x12, x9, ne +; ALL-NEXT: stp x9, x8, [x2] +; ALL-NEXT: ret + %src = load i128, ptr %src.ptr, align 1 + %bitOff = load i128, ptr %bitOff.ptr, align 1 + %res = ashr i128 %src, %bitOff + store i128 %res, ptr %dst, align 1 + ret void +} + +define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; ALL-LABEL: lshr_32bytes: +; ALL: // %bb.0: +; ALL-NEXT: ldr x9, [x1] +; ALL-NEXT: mov w8, #128 +; ALL-NEXT: ldp x10, x11, [x0, #16] +; ALL-NEXT: sub x8, x8, x9 +; ALL-NEXT: mvn w16, w9 +; ALL-NEXT: ldp x13, x12, [x0] +; ALL-NEXT: mvn w0, w8 +; ALL-NEXT: lsr x14, x10, #1 +; ALL-NEXT: lsl x1, x11, x8 +; ALL-NEXT: tst x8, #0x40 +; ALL-NEXT: lsl x8, x10, x8 +; ALL-NEXT: lsl x17, x11, #1 +; ALL-NEXT: lsr x14, x14, x0 +; ALL-NEXT: csel x0, xzr, x8, ne +; ALL-NEXT: orr x14, x1, x14 +; ALL-NEXT: lsr x15, x10, x9 +; ALL-NEXT: csel x8, x8, x14, ne +; ALL-NEXT: lsl x14, x12, #1 +; ALL-NEXT: lsl x3, x17, x16 +; ALL-NEXT: lsr x1, x13, x9 +; ALL-NEXT: lsl x14, x14, x16 +; ALL-NEXT: lsr x18, x11, x9 +; ALL-NEXT: orr x15, x3, x15 +; ALL-NEXT: tst x9, #0x40 +; ALL-NEXT: orr x14, x14, x1 +; ALL-NEXT: lsr x16, x12, x9 +; ALL-NEXT: csel x15, x18, x15, ne +; ALL-NEXT: csel x14, x16, x14, ne +; ALL-NEXT: csel x16, xzr, x16, ne +; ALL-NEXT: csel x18, xzr, x18, ne +; ALL-NEXT: subs x1, x9, #128 +; ALL-NEXT: orr x14, x14, x0 +; ALL-NEXT: mvn w3, w1 +; ALL-NEXT: orr x8, x16, x8 +; ALL-NEXT: lsr x10, x10, x1 +; ALL-NEXT: lsr x11, x11, x1 +; ALL-NEXT: lsl x17, x17, x3 +; ALL-NEXT: orr x10, x17, x10 +; ALL-NEXT: csel x17, x18, xzr, lo +; ALL-NEXT: tst x1, #0x40 +; ALL-NEXT: csel x10, x11, x10, ne +; ALL-NEXT: csel x11, xzr, x11, ne +; ALL-NEXT: cmp x9, #128 +; ALL-NEXT: csel x10, x14, x10, lo +; ALL-NEXT: csel x14, x15, xzr, lo +; ALL-NEXT: csel x8, x8, x11, lo +; ALL-NEXT: cmp x9, #0 +; ALL-NEXT: csel x9, x13, x10, eq +; ALL-NEXT: csel x8, x12, x8, eq +; ALL-NEXT: stp x14, x17, [x2, #16] +; ALL-NEXT: stp x9, x8, [x2] +; ALL-NEXT: ret + %src = load i256, ptr %src.ptr, align 1 + %bitOff = load i256, ptr %bitOff.ptr, align 1 + %res = lshr i256 %src, %bitOff + store i256 %res, ptr %dst, align 1 + ret void +} +define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; ALL-LABEL: shl_32bytes: +; ALL: // %bb.0: +; ALL-NEXT: ldr x9, [x1] +; ALL-NEXT: mov w8, #128 +; ALL-NEXT: ldp x11, x10, [x0] +; ALL-NEXT: sub x8, x8, x9 +; ALL-NEXT: mvn w16, w9 +; ALL-NEXT: ldp x12, x13, [x0, #16] +; ALL-NEXT: mvn w0, w8 +; ALL-NEXT: tst x8, #0x40 +; ALL-NEXT: lsl x14, x10, #1 +; ALL-NEXT: lsr x1, x11, x8 +; ALL-NEXT: lsr x8, x10, x8 +; ALL-NEXT: lsr x17, x11, #1 +; ALL-NEXT: lsl x14, x14, x0 +; ALL-NEXT: csel x0, xzr, x8, ne +; ALL-NEXT: orr x14, x14, x1 +; ALL-NEXT: lsl x15, x10, x9 +; ALL-NEXT: csel x8, x8, x14, ne +; ALL-NEXT: lsr x14, x12, #1 +; ALL-NEXT: lsr x3, x17, x16 +; ALL-NEXT: lsl x1, x13, x9 +; ALL-NEXT: lsr x14, x14, x16 +; ALL-NEXT: lsl x18, x11, x9 +; ALL-NEXT: orr x15, x15, x3 +; ALL-NEXT: tst x9, #0x40 +; ALL-NEXT: orr x14, x1, x14 +; ALL-NEXT: lsl x16, x12, x9 +; ALL-NEXT: csel x15, x18, x15, ne +; ALL-NEXT: csel x14, x16, x14, ne +; ALL-NEXT: csel x16, xzr, x16, ne +; ALL-NEXT: csel x18, xzr, x18, ne +; ALL-NEXT: subs x1, x9, #128 +; ALL-NEXT: orr x14, x14, x0 +; ALL-NEXT: mvn w3, w1 +; ALL-NEXT: orr x8, x16, x8 +; ALL-NEXT: lsl x10, x10, x1 +; ALL-NEXT: lsl x11, x11, x1 +; ALL-NEXT: lsr x17, x17, x3 +; ALL-NEXT: orr x10, x10, x17 +; ALL-NEXT: csel x17, x18, xzr, lo +; ALL-NEXT: tst x1, #0x40 +; ALL-NEXT: csel x10, x11, x10, ne +; ALL-NEXT: csel x11, xzr, x11, ne +; ALL-NEXT: cmp x9, #128 +; ALL-NEXT: csel x10, x14, x10, lo +; ALL-NEXT: csel x14, x15, xzr, lo +; ALL-NEXT: csel x8, x8, x11, lo +; ALL-NEXT: cmp x9, #0 +; ALL-NEXT: csel x9, x13, x10, eq +; ALL-NEXT: csel x8, x12, x8, eq +; ALL-NEXT: stp x17, x14, [x2] +; ALL-NEXT: stp x8, x9, [x2, #16] +; ALL-NEXT: ret + %src = load i256, ptr %src.ptr, align 1 + %bitOff = load i256, ptr %bitOff.ptr, align 1 + %res = shl i256 %src, %bitOff + store i256 %res, ptr %dst, align 1 + ret void +} +define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; ALL-LABEL: ashr_32bytes: +; ALL: // %bb.0: +; ALL-NEXT: ldr x9, [x1] +; ALL-NEXT: mov w8, #128 +; ALL-NEXT: ldp x11, x10, [x0, #8] +; ALL-NEXT: sub x8, x8, x9 +; ALL-NEXT: ldr x13, [x0, #24] +; ALL-NEXT: mvn w18, w8 +; ALL-NEXT: ldr x12, [x0] +; ALL-NEXT: mvn w16, w9 +; ALL-NEXT: tst x8, #0x40 +; ALL-NEXT: lsr x14, x10, #1 +; ALL-NEXT: lsl x1, x13, x8 +; ALL-NEXT: lsr x14, x14, x18 +; ALL-NEXT: lsl x8, x10, x8 +; ALL-NEXT: orr x14, x1, x14 +; ALL-NEXT: lsl x17, x13, #1 +; ALL-NEXT: csel x18, xzr, x8, ne +; ALL-NEXT: csel x8, x8, x14, ne +; ALL-NEXT: lsl x14, x11, #1 +; ALL-NEXT: lsr x15, x10, x9 +; ALL-NEXT: lsl x3, x17, x16 +; ALL-NEXT: lsr x1, x12, x9 +; ALL-NEXT: lsl x14, x14, x16 +; ALL-NEXT: asr x0, x13, x9 +; ALL-NEXT: orr x15, x3, x15 +; ALL-NEXT: tst x9, #0x40 +; ALL-NEXT: orr x14, x14, x1 +; ALL-NEXT: lsr x16, x11, x9 +; ALL-NEXT: asr x1, x13, #63 +; ALL-NEXT: csel x15, x0, x15, ne +; ALL-NEXT: csel x14, x16, x14, ne +; ALL-NEXT: csel x16, xzr, x16, ne +; ALL-NEXT: csel x0, x1, x0, ne +; ALL-NEXT: subs x3, x9, #128 +; ALL-NEXT: mvn w4, w3 +; ALL-NEXT: orr x14, x14, x18 +; ALL-NEXT: orr x8, x16, x8 +; ALL-NEXT: lsr x10, x10, x3 +; ALL-NEXT: asr x13, x13, x3 +; ALL-NEXT: lsl x17, x17, x4 +; ALL-NEXT: orr x10, x17, x10 +; ALL-NEXT: csel x17, x0, x1, lo +; ALL-NEXT: tst x3, #0x40 +; ALL-NEXT: csel x10, x13, x10, ne +; ALL-NEXT: csel x13, x1, x13, ne +; ALL-NEXT: cmp x9, #128 +; ALL-NEXT: csel x10, x14, x10, lo +; ALL-NEXT: csel x14, x15, x1, lo +; ALL-NEXT: csel x8, x8, x13, lo +; ALL-NEXT: cmp x9, #0 +; ALL-NEXT: csel x9, x12, x10, eq +; ALL-NEXT: csel x8, x11, x8, eq +; ALL-NEXT: stp x14, x17, [x2, #16] +; ALL-NEXT: stp x9, x8, [x2] +; ALL-NEXT: ret + %src = load i256, ptr %src.ptr, align 1 + %bitOff = load i256, ptr %bitOff.ptr, align 1 + %res = ashr i256 %src, %bitOff + store i256 %res, ptr %dst, align 1 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll new file mode 100644 index 0000000..92d582e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll @@ -0,0 +1,2407 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,LE,LE-64BIT +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BE +; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=ALL,LE,LE-32BIT + +define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; ALL-LABEL: lshr_4bytes: +; ALL: # %bb.0: +; ALL-NEXT: lwz 3, 0(3) +; ALL-NEXT: lwz 4, 0(4) +; ALL-NEXT: srw 3, 3, 4 +; ALL-NEXT: stw 3, 0(5) +; ALL-NEXT: blr + %src = load i32, ptr %src.ptr, align 1 + %bitOff = load i32, ptr %bitOff.ptr, align 1 + %res = lshr i32 %src, %bitOff + store i32 %res, ptr %dst, align 1 + ret void +} +define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; ALL-LABEL: shl_4bytes: +; ALL: # %bb.0: +; ALL-NEXT: lwz 3, 0(3) +; ALL-NEXT: lwz 4, 0(4) +; ALL-NEXT: slw 3, 3, 4 +; ALL-NEXT: stw 3, 0(5) +; ALL-NEXT: blr + %src = load i32, ptr %src.ptr, align 1 + %bitOff = load i32, ptr %bitOff.ptr, align 1 + %res = shl i32 %src, %bitOff + store i32 %res, ptr %dst, align 1 + ret void +} +define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; ALL-LABEL: ashr_4bytes: +; ALL: # %bb.0: +; ALL-NEXT: lwz 3, 0(3) +; ALL-NEXT: lwz 4, 0(4) +; ALL-NEXT: sraw 3, 3, 4 +; ALL-NEXT: stw 3, 0(5) +; ALL-NEXT: blr + %src = load i32, ptr %src.ptr, align 1 + %bitOff = load i32, ptr %bitOff.ptr, align 1 + %res = ashr i32 %src, %bitOff + store i32 %res, ptr %dst, align 1 + ret void +} + +define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; LE-64BIT-LABEL: lshr_8bytes: +; LE-64BIT: # %bb.0: +; LE-64BIT-NEXT: ld 3, 0(3) +; LE-64BIT-NEXT: lwz 4, 0(4) +; LE-64BIT-NEXT: srd 3, 3, 4 +; LE-64BIT-NEXT: std 3, 0(5) +; LE-64BIT-NEXT: blr +; +; BE-LABEL: lshr_8bytes: +; BE: # %bb.0: +; BE-NEXT: ld 3, 0(3) +; BE-NEXT: lwz 4, 4(4) +; BE-NEXT: srd 3, 3, 4 +; BE-NEXT: std 3, 0(5) +; BE-NEXT: blr +; +; LE-32BIT-LABEL: lshr_8bytes: +; LE-32BIT: # %bb.0: +; LE-32BIT-NEXT: lwz 4, 4(4) +; LE-32BIT-NEXT: lwz 6, 4(3) +; LE-32BIT-NEXT: lwz 3, 0(3) +; LE-32BIT-NEXT: subfic 7, 4, 32 +; LE-32BIT-NEXT: srw 6, 6, 4 +; LE-32BIT-NEXT: addi 8, 4, -32 +; LE-32BIT-NEXT: slw 7, 3, 7 +; LE-32BIT-NEXT: srw 4, 3, 4 +; LE-32BIT-NEXT: srw 3, 3, 8 +; LE-32BIT-NEXT: or 6, 6, 7 +; LE-32BIT-NEXT: or 3, 6, 3 +; LE-32BIT-NEXT: stw 4, 0(5) +; LE-32BIT-NEXT: stw 3, 4(5) +; LE-32BIT-NEXT: blr + %src = load i64, ptr %src.ptr, align 1 + %bitOff = load i64, ptr %bitOff.ptr, align 1 + %res = lshr i64 %src, %bitOff + store i64 %res, ptr %dst, align 1 + ret void +} +define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; LE-64BIT-LABEL: shl_8bytes: +; LE-64BIT: # %bb.0: +; LE-64BIT-NEXT: ld 3, 0(3) +; LE-64BIT-NEXT: lwz 4, 0(4) +; LE-64BIT-NEXT: sld 3, 3, 4 +; LE-64BIT-NEXT: std 3, 0(5) +; LE-64BIT-NEXT: blr +; +; BE-LABEL: shl_8bytes: +; BE: # %bb.0: +; BE-NEXT: ld 3, 0(3) +; BE-NEXT: lwz 4, 4(4) +; BE-NEXT: sld 3, 3, 4 +; BE-NEXT: std 3, 0(5) +; BE-NEXT: blr +; +; LE-32BIT-LABEL: shl_8bytes: +; LE-32BIT: # %bb.0: +; LE-32BIT-NEXT: lwz 4, 4(4) +; LE-32BIT-NEXT: lwz 6, 0(3) +; LE-32BIT-NEXT: lwz 3, 4(3) +; LE-32BIT-NEXT: subfic 7, 4, 32 +; LE-32BIT-NEXT: slw 6, 6, 4 +; LE-32BIT-NEXT: addi 8, 4, -32 +; LE-32BIT-NEXT: srw 7, 3, 7 +; LE-32BIT-NEXT: slw 4, 3, 4 +; LE-32BIT-NEXT: slw 3, 3, 8 +; LE-32BIT-NEXT: or 6, 6, 7 +; LE-32BIT-NEXT: or 3, 6, 3 +; LE-32BIT-NEXT: stw 4, 4(5) +; LE-32BIT-NEXT: stw 3, 0(5) +; LE-32BIT-NEXT: blr + %src = load i64, ptr %src.ptr, align 1 + %bitOff = load i64, ptr %bitOff.ptr, align 1 + %res = shl i64 %src, %bitOff + store i64 %res, ptr %dst, align 1 + ret void +} +define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; LE-64BIT-LABEL: ashr_8bytes: +; LE-64BIT: # %bb.0: +; LE-64BIT-NEXT: ld 3, 0(3) +; LE-64BIT-NEXT: lwz 4, 0(4) +; LE-64BIT-NEXT: srad 3, 3, 4 +; LE-64BIT-NEXT: std 3, 0(5) +; LE-64BIT-NEXT: blr +; +; BE-LABEL: ashr_8bytes: +; BE: # %bb.0: +; BE-NEXT: ld 3, 0(3) +; BE-NEXT: lwz 4, 4(4) +; BE-NEXT: srad 3, 3, 4 +; BE-NEXT: std 3, 0(5) +; BE-NEXT: blr +; +; LE-32BIT-LABEL: ashr_8bytes: +; LE-32BIT: # %bb.0: +; LE-32BIT-NEXT: lwz 4, 4(4) +; LE-32BIT-NEXT: lwz 6, 4(3) +; LE-32BIT-NEXT: lwz 3, 0(3) +; LE-32BIT-NEXT: subfic 7, 4, 32 +; LE-32BIT-NEXT: srw 6, 6, 4 +; LE-32BIT-NEXT: addi 8, 4, -32 +; LE-32BIT-NEXT: slw 7, 3, 7 +; LE-32BIT-NEXT: sraw 4, 3, 4 +; LE-32BIT-NEXT: sraw 3, 3, 8 +; LE-32BIT-NEXT: cmpwi 8, 1 +; LE-32BIT-NEXT: or 6, 6, 7 +; LE-32BIT-NEXT: bc 12, 0, .LBB5_1 +; LE-32BIT-NEXT: b .LBB5_2 +; LE-32BIT-NEXT: .LBB5_1: +; LE-32BIT-NEXT: addi 3, 6, 0 +; LE-32BIT-NEXT: .LBB5_2: +; LE-32BIT-NEXT: stw 4, 0(5) +; LE-32BIT-NEXT: stw 3, 4(5) +; LE-32BIT-NEXT: blr + %src = load i64, ptr %src.ptr, align 1 + %bitOff = load i64, ptr %bitOff.ptr, align 1 + %res = ashr i64 %src, %bitOff + store i64 %res, ptr %dst, align 1 + ret void +} + +define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; LE-64BIT-LABEL: lshr_16bytes: +; LE-64BIT: # %bb.0: +; LE-64BIT-NEXT: lwz 4, 0(4) +; LE-64BIT-NEXT: ld 6, 0(3) +; LE-64BIT-NEXT: ld 3, 8(3) +; LE-64BIT-NEXT: subfic 7, 4, 64 +; LE-64BIT-NEXT: srd 6, 6, 4 +; LE-64BIT-NEXT: addi 8, 4, -64 +; LE-64BIT-NEXT: sld 7, 3, 7 +; LE-64BIT-NEXT: or 6, 6, 7 +; LE-64BIT-NEXT: srd 7, 3, 8 +; LE-64BIT-NEXT: or 6, 6, 7 +; LE-64BIT-NEXT: srd 3, 3, 4 +; LE-64BIT-NEXT: std 3, 8(5) +; LE-64BIT-NEXT: std 6, 0(5) +; LE-64BIT-NEXT: blr +; +; BE-LABEL: lshr_16bytes: +; BE: # %bb.0: +; BE-NEXT: lwz 4, 12(4) +; BE-NEXT: ld 6, 0(3) +; BE-NEXT: ld 3, 8(3) +; BE-NEXT: subfic 7, 4, 64 +; BE-NEXT: srd 3, 3, 4 +; BE-NEXT: sld 7, 6, 7 +; BE-NEXT: addi 8, 4, -64 +; BE-NEXT: or 3, 3, 7 +; BE-NEXT: srd 7, 6, 8 +; BE-NEXT: srd 4, 6, 4 +; BE-NEXT: or 3, 3, 7 +; BE-NEXT: std 4, 0(5) +; BE-NEXT: std 3, 8(5) +; BE-NEXT: blr +; +; LE-32BIT-LABEL: lshr_16bytes: +; LE-32BIT: # %bb.0: +; LE-32BIT-NEXT: stwu 1, -32(1) +; LE-32BIT-NEXT: lwz 4, 12(4) +; LE-32BIT-NEXT: li 8, 0 +; LE-32BIT-NEXT: lwz 6, 8(3) +; LE-32BIT-NEXT: lwz 7, 12(3) +; LE-32BIT-NEXT: subfic 10, 4, 96 +; LE-32BIT-NEXT: lwz 9, 4(3) +; LE-32BIT-NEXT: addi 11, 4, -64 +; LE-32BIT-NEXT: lwz 3, 0(3) +; LE-32BIT-NEXT: cmplwi 4, 64 +; LE-32BIT-NEXT: stw 27, 12(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: srw 27, 9, 11 +; LE-32BIT-NEXT: stw 28, 16(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: srw 28, 3, 4 +; LE-32BIT-NEXT: stw 30, 24(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: subfic 30, 4, 32 +; LE-32BIT-NEXT: slw 10, 3, 10 +; LE-32BIT-NEXT: addi 12, 4, -96 +; LE-32BIT-NEXT: srw 0, 7, 4 +; LE-32BIT-NEXT: or 10, 27, 10 +; LE-32BIT-NEXT: slw 27, 6, 30 +; LE-32BIT-NEXT: bc 12, 0, .LBB6_2 +; LE-32BIT-NEXT: # %bb.1: +; LE-32BIT-NEXT: ori 28, 8, 0 +; LE-32BIT-NEXT: b .LBB6_2 +; LE-32BIT-NEXT: .LBB6_2: +; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: srw 29, 9, 4 +; LE-32BIT-NEXT: or 0, 0, 27 +; LE-32BIT-NEXT: slw 27, 3, 30 +; LE-32BIT-NEXT: stw 28, 0(5) +; LE-32BIT-NEXT: subfic 28, 4, 64 +; LE-32BIT-NEXT: srw 12, 3, 12 +; LE-32BIT-NEXT: or 29, 29, 27 +; LE-32BIT-NEXT: addi 27, 4, -32 +; LE-32BIT-NEXT: or 10, 10, 12 +; LE-32BIT-NEXT: subfic 12, 28, 32 +; LE-32BIT-NEXT: slw 30, 9, 30 +; LE-32BIT-NEXT: srw 12, 9, 12 +; LE-32BIT-NEXT: slw 9, 9, 28 +; LE-32BIT-NEXT: slw 28, 3, 28 +; LE-32BIT-NEXT: srw 11, 3, 11 +; LE-32BIT-NEXT: srw 3, 3, 27 +; LE-32BIT-NEXT: srw 27, 6, 27 +; LE-32BIT-NEXT: or 0, 0, 27 +; LE-32BIT-NEXT: or 12, 28, 12 +; LE-32BIT-NEXT: cmplwi 1, 4, 0 +; LE-32BIT-NEXT: srw 4, 6, 4 +; LE-32BIT-NEXT: or 3, 29, 3 +; LE-32BIT-NEXT: or 9, 0, 9 +; LE-32BIT-NEXT: or 12, 12, 30 +; LE-32BIT-NEXT: bc 12, 0, .LBB6_4 +; LE-32BIT-NEXT: # %bb.3: +; LE-32BIT-NEXT: ori 3, 8, 0 +; LE-32BIT-NEXT: ori 8, 10, 0 +; LE-32BIT-NEXT: b .LBB6_5 +; LE-32BIT-NEXT: .LBB6_4: +; LE-32BIT-NEXT: addi 8, 9, 0 +; LE-32BIT-NEXT: .LBB6_5: +; LE-32BIT-NEXT: or 4, 4, 12 +; LE-32BIT-NEXT: stw 3, 4(5) +; LE-32BIT-NEXT: bc 12, 6, .LBB6_7 +; LE-32BIT-NEXT: # %bb.6: +; LE-32BIT-NEXT: ori 3, 8, 0 +; LE-32BIT-NEXT: b .LBB6_8 +; LE-32BIT-NEXT: .LBB6_7: +; LE-32BIT-NEXT: addi 3, 7, 0 +; LE-32BIT-NEXT: .LBB6_8: +; LE-32BIT-NEXT: bc 12, 0, .LBB6_10 +; LE-32BIT-NEXT: # %bb.9: +; LE-32BIT-NEXT: ori 4, 11, 0 +; LE-32BIT-NEXT: b .LBB6_10 +; LE-32BIT-NEXT: .LBB6_10: +; LE-32BIT-NEXT: stw 3, 12(5) +; LE-32BIT-NEXT: bc 12, 6, .LBB6_12 +; LE-32BIT-NEXT: # %bb.11: +; LE-32BIT-NEXT: ori 3, 4, 0 +; LE-32BIT-NEXT: b .LBB6_13 +; LE-32BIT-NEXT: .LBB6_12: +; LE-32BIT-NEXT: addi 3, 6, 0 +; LE-32BIT-NEXT: .LBB6_13: +; LE-32BIT-NEXT: stw 3, 8(5) +; LE-32BIT-NEXT: lwz 30, 24(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 29, 20(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 28, 16(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 27, 12(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: addi 1, 1, 32 +; LE-32BIT-NEXT: blr + %src = load i128, ptr %src.ptr, align 1 + %bitOff = load i128, ptr %bitOff.ptr, align 1 + %res = lshr i128 %src, %bitOff + store i128 %res, ptr %dst, align 1 + ret void +} +define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; LE-64BIT-LABEL: shl_16bytes: +; LE-64BIT: # %bb.0: +; LE-64BIT-NEXT: lwz 4, 0(4) +; LE-64BIT-NEXT: ld 6, 8(3) +; LE-64BIT-NEXT: ld 3, 0(3) +; LE-64BIT-NEXT: subfic 7, 4, 64 +; LE-64BIT-NEXT: sld 6, 6, 4 +; LE-64BIT-NEXT: addi 8, 4, -64 +; LE-64BIT-NEXT: srd 7, 3, 7 +; LE-64BIT-NEXT: or 6, 6, 7 +; LE-64BIT-NEXT: sld 7, 3, 8 +; LE-64BIT-NEXT: or 6, 6, 7 +; LE-64BIT-NEXT: sld 3, 3, 4 +; LE-64BIT-NEXT: std 3, 0(5) +; LE-64BIT-NEXT: std 6, 8(5) +; LE-64BIT-NEXT: blr +; +; BE-LABEL: shl_16bytes: +; BE: # %bb.0: +; BE-NEXT: lwz 4, 12(4) +; BE-NEXT: ld 6, 8(3) +; BE-NEXT: ld 3, 0(3) +; BE-NEXT: subfic 7, 4, 64 +; BE-NEXT: sld 3, 3, 4 +; BE-NEXT: srd 7, 6, 7 +; BE-NEXT: addi 8, 4, -64 +; BE-NEXT: or 3, 3, 7 +; BE-NEXT: sld 7, 6, 8 +; BE-NEXT: sld 4, 6, 4 +; BE-NEXT: or 3, 3, 7 +; BE-NEXT: std 4, 8(5) +; BE-NEXT: std 3, 0(5) +; BE-NEXT: blr +; +; LE-32BIT-LABEL: shl_16bytes: +; LE-32BIT: # %bb.0: +; LE-32BIT-NEXT: stwu 1, -32(1) +; LE-32BIT-NEXT: lwz 4, 12(4) +; LE-32BIT-NEXT: li 8, 0 +; LE-32BIT-NEXT: lwz 6, 4(3) +; LE-32BIT-NEXT: lwz 7, 0(3) +; LE-32BIT-NEXT: subfic 10, 4, 96 +; LE-32BIT-NEXT: lwz 9, 8(3) +; LE-32BIT-NEXT: addi 11, 4, -64 +; LE-32BIT-NEXT: lwz 3, 12(3) +; LE-32BIT-NEXT: cmplwi 4, 64 +; LE-32BIT-NEXT: stw 27, 12(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: slw 27, 9, 11 +; LE-32BIT-NEXT: stw 28, 16(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: slw 28, 3, 4 +; LE-32BIT-NEXT: stw 30, 24(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: subfic 30, 4, 32 +; LE-32BIT-NEXT: srw 10, 3, 10 +; LE-32BIT-NEXT: addi 12, 4, -96 +; LE-32BIT-NEXT: slw 0, 7, 4 +; LE-32BIT-NEXT: or 10, 27, 10 +; LE-32BIT-NEXT: srw 27, 6, 30 +; LE-32BIT-NEXT: bc 12, 0, .LBB7_2 +; LE-32BIT-NEXT: # %bb.1: +; LE-32BIT-NEXT: ori 28, 8, 0 +; LE-32BIT-NEXT: b .LBB7_2 +; LE-32BIT-NEXT: .LBB7_2: +; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: slw 29, 9, 4 +; LE-32BIT-NEXT: or 0, 0, 27 +; LE-32BIT-NEXT: srw 27, 3, 30 +; LE-32BIT-NEXT: stw 28, 12(5) +; LE-32BIT-NEXT: subfic 28, 4, 64 +; LE-32BIT-NEXT: slw 12, 3, 12 +; LE-32BIT-NEXT: or 29, 29, 27 +; LE-32BIT-NEXT: addi 27, 4, -32 +; LE-32BIT-NEXT: or 10, 10, 12 +; LE-32BIT-NEXT: subfic 12, 28, 32 +; LE-32BIT-NEXT: srw 30, 9, 30 +; LE-32BIT-NEXT: slw 12, 9, 12 +; LE-32BIT-NEXT: srw 9, 9, 28 +; LE-32BIT-NEXT: srw 28, 3, 28 +; LE-32BIT-NEXT: slw 11, 3, 11 +; LE-32BIT-NEXT: slw 3, 3, 27 +; LE-32BIT-NEXT: slw 27, 6, 27 +; LE-32BIT-NEXT: or 0, 0, 27 +; LE-32BIT-NEXT: or 12, 28, 12 +; LE-32BIT-NEXT: cmplwi 1, 4, 0 +; LE-32BIT-NEXT: slw 4, 6, 4 +; LE-32BIT-NEXT: or 3, 29, 3 +; LE-32BIT-NEXT: or 9, 0, 9 +; LE-32BIT-NEXT: or 12, 12, 30 +; LE-32BIT-NEXT: bc 12, 0, .LBB7_4 +; LE-32BIT-NEXT: # %bb.3: +; LE-32BIT-NEXT: ori 3, 8, 0 +; LE-32BIT-NEXT: ori 8, 10, 0 +; LE-32BIT-NEXT: b .LBB7_5 +; LE-32BIT-NEXT: .LBB7_4: +; LE-32BIT-NEXT: addi 8, 9, 0 +; LE-32BIT-NEXT: .LBB7_5: +; LE-32BIT-NEXT: or 4, 4, 12 +; LE-32BIT-NEXT: stw 3, 8(5) +; LE-32BIT-NEXT: bc 12, 6, .LBB7_7 +; LE-32BIT-NEXT: # %bb.6: +; LE-32BIT-NEXT: ori 3, 8, 0 +; LE-32BIT-NEXT: b .LBB7_8 +; LE-32BIT-NEXT: .LBB7_7: +; LE-32BIT-NEXT: addi 3, 7, 0 +; LE-32BIT-NEXT: .LBB7_8: +; LE-32BIT-NEXT: bc 12, 0, .LBB7_10 +; LE-32BIT-NEXT: # %bb.9: +; LE-32BIT-NEXT: ori 4, 11, 0 +; LE-32BIT-NEXT: b .LBB7_10 +; LE-32BIT-NEXT: .LBB7_10: +; LE-32BIT-NEXT: stw 3, 0(5) +; LE-32BIT-NEXT: bc 12, 6, .LBB7_12 +; LE-32BIT-NEXT: # %bb.11: +; LE-32BIT-NEXT: ori 3, 4, 0 +; LE-32BIT-NEXT: b .LBB7_13 +; LE-32BIT-NEXT: .LBB7_12: +; LE-32BIT-NEXT: addi 3, 6, 0 +; LE-32BIT-NEXT: .LBB7_13: +; LE-32BIT-NEXT: stw 3, 4(5) +; LE-32BIT-NEXT: lwz 30, 24(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 29, 20(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 28, 16(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 27, 12(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: addi 1, 1, 32 +; LE-32BIT-NEXT: blr + %src = load i128, ptr %src.ptr, align 1 + %bitOff = load i128, ptr %bitOff.ptr, align 1 + %res = shl i128 %src, %bitOff + store i128 %res, ptr %dst, align 1 + ret void +} +define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; LE-64BIT-LABEL: ashr_16bytes: +; LE-64BIT: # %bb.0: +; LE-64BIT-NEXT: lwz 4, 0(4) +; LE-64BIT-NEXT: ld 6, 0(3) +; LE-64BIT-NEXT: ld 3, 8(3) +; LE-64BIT-NEXT: subfic 7, 4, 64 +; LE-64BIT-NEXT: srd 6, 6, 4 +; LE-64BIT-NEXT: addi 8, 4, -64 +; LE-64BIT-NEXT: sld 7, 3, 7 +; LE-64BIT-NEXT: cmpwi 8, 1 +; LE-64BIT-NEXT: or 6, 6, 7 +; LE-64BIT-NEXT: srad 7, 3, 8 +; LE-64BIT-NEXT: isellt 6, 6, 7 +; LE-64BIT-NEXT: srad 3, 3, 4 +; LE-64BIT-NEXT: std 3, 8(5) +; LE-64BIT-NEXT: std 6, 0(5) +; LE-64BIT-NEXT: blr +; +; BE-LABEL: ashr_16bytes: +; BE: # %bb.0: +; BE-NEXT: lwz 4, 12(4) +; BE-NEXT: ld 6, 8(3) +; BE-NEXT: ld 3, 0(3) +; BE-NEXT: subfic 7, 4, 64 +; BE-NEXT: srd 6, 6, 4 +; BE-NEXT: addi 8, 4, -64 +; BE-NEXT: sld 7, 3, 7 +; BE-NEXT: cmpwi 8, 1 +; BE-NEXT: or 6, 6, 7 +; BE-NEXT: srad 7, 3, 8 +; BE-NEXT: srad 3, 3, 4 +; BE-NEXT: bc 12, 0, .LBB8_2 +; BE-NEXT: # %bb.1: +; BE-NEXT: ori 6, 7, 0 +; BE-NEXT: b .LBB8_2 +; BE-NEXT: .LBB8_2: +; BE-NEXT: std 3, 0(5) +; BE-NEXT: std 6, 8(5) +; BE-NEXT: blr +; +; LE-32BIT-LABEL: ashr_16bytes: +; LE-32BIT: # %bb.0: +; LE-32BIT-NEXT: stwu 1, -32(1) +; LE-32BIT-NEXT: lwz 4, 12(4) +; LE-32BIT-NEXT: lwz 6, 8(3) +; LE-32BIT-NEXT: lwz 7, 12(3) +; LE-32BIT-NEXT: subfic 9, 4, 96 +; LE-32BIT-NEXT: lwz 8, 4(3) +; LE-32BIT-NEXT: addi 10, 4, -64 +; LE-32BIT-NEXT: lwz 3, 0(3) +; LE-32BIT-NEXT: subfic 0, 4, 32 +; LE-32BIT-NEXT: stw 27, 12(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: srw 27, 8, 10 +; LE-32BIT-NEXT: slw 9, 3, 9 +; LE-32BIT-NEXT: srw 12, 7, 4 +; LE-32BIT-NEXT: or 9, 27, 9 +; LE-32BIT-NEXT: slw 27, 6, 0 +; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: srw 29, 8, 4 +; LE-32BIT-NEXT: or 12, 12, 27 +; LE-32BIT-NEXT: slw 27, 3, 0 +; LE-32BIT-NEXT: stw 28, 16(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: cmplwi 4, 64 +; LE-32BIT-NEXT: srawi 28, 3, 31 +; LE-32BIT-NEXT: or 29, 29, 27 +; LE-32BIT-NEXT: sraw 27, 3, 4 +; LE-32BIT-NEXT: addi 11, 4, -96 +; LE-32BIT-NEXT: bc 12, 0, .LBB8_2 +; LE-32BIT-NEXT: # %bb.1: +; LE-32BIT-NEXT: ori 27, 28, 0 +; LE-32BIT-NEXT: b .LBB8_2 +; LE-32BIT-NEXT: .LBB8_2: +; LE-32BIT-NEXT: cmpwi 1, 11, 1 +; LE-32BIT-NEXT: sraw 11, 3, 11 +; LE-32BIT-NEXT: stw 27, 0(5) +; LE-32BIT-NEXT: subfic 27, 4, 64 +; LE-32BIT-NEXT: stw 30, 24(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: addi 30, 4, -32 +; LE-32BIT-NEXT: bc 12, 4, .LBB8_4 +; LE-32BIT-NEXT: # %bb.3: +; LE-32BIT-NEXT: ori 9, 11, 0 +; LE-32BIT-NEXT: b .LBB8_4 +; LE-32BIT-NEXT: .LBB8_4: +; LE-32BIT-NEXT: subfic 11, 27, 32 +; LE-32BIT-NEXT: slw 0, 8, 0 +; LE-32BIT-NEXT: srw 11, 8, 11 +; LE-32BIT-NEXT: slw 8, 8, 27 +; LE-32BIT-NEXT: slw 27, 3, 27 +; LE-32BIT-NEXT: sraw 10, 3, 10 +; LE-32BIT-NEXT: sraw 3, 3, 30 +; LE-32BIT-NEXT: cmpwi 1, 30, 1 +; LE-32BIT-NEXT: srw 30, 6, 30 +; LE-32BIT-NEXT: or 12, 12, 30 +; LE-32BIT-NEXT: or 11, 27, 11 +; LE-32BIT-NEXT: bc 12, 4, .LBB8_5 +; LE-32BIT-NEXT: b .LBB8_6 +; LE-32BIT-NEXT: .LBB8_5: +; LE-32BIT-NEXT: addi 3, 29, 0 +; LE-32BIT-NEXT: .LBB8_6: +; LE-32BIT-NEXT: cmplwi 1, 4, 0 +; LE-32BIT-NEXT: srw 4, 6, 4 +; LE-32BIT-NEXT: or 8, 12, 8 +; LE-32BIT-NEXT: or 11, 11, 0 +; LE-32BIT-NEXT: bc 12, 0, .LBB8_8 +; LE-32BIT-NEXT: # %bb.7: +; LE-32BIT-NEXT: ori 3, 28, 0 +; LE-32BIT-NEXT: ori 8, 9, 0 +; LE-32BIT-NEXT: b .LBB8_8 +; LE-32BIT-NEXT: .LBB8_8: +; LE-32BIT-NEXT: or 4, 4, 11 +; LE-32BIT-NEXT: stw 3, 4(5) +; LE-32BIT-NEXT: bc 12, 6, .LBB8_10 +; LE-32BIT-NEXT: # %bb.9: +; LE-32BIT-NEXT: ori 3, 8, 0 +; LE-32BIT-NEXT: b .LBB8_11 +; LE-32BIT-NEXT: .LBB8_10: +; LE-32BIT-NEXT: addi 3, 7, 0 +; LE-32BIT-NEXT: .LBB8_11: +; LE-32BIT-NEXT: bc 12, 0, .LBB8_13 +; LE-32BIT-NEXT: # %bb.12: +; LE-32BIT-NEXT: ori 4, 10, 0 +; LE-32BIT-NEXT: b .LBB8_13 +; LE-32BIT-NEXT: .LBB8_13: +; LE-32BIT-NEXT: stw 3, 12(5) +; LE-32BIT-NEXT: bc 12, 6, .LBB8_15 +; LE-32BIT-NEXT: # %bb.14: +; LE-32BIT-NEXT: ori 3, 4, 0 +; LE-32BIT-NEXT: b .LBB8_16 +; LE-32BIT-NEXT: .LBB8_15: +; LE-32BIT-NEXT: addi 3, 6, 0 +; LE-32BIT-NEXT: .LBB8_16: +; LE-32BIT-NEXT: stw 3, 8(5) +; LE-32BIT-NEXT: lwz 30, 24(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 29, 20(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 28, 16(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 27, 12(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: addi 1, 1, 32 +; LE-32BIT-NEXT: blr + %src = load i128, ptr %src.ptr, align 1 + %bitOff = load i128, ptr %bitOff.ptr, align 1 + %res = ashr i128 %src, %bitOff + store i128 %res, ptr %dst, align 1 + ret void +} + +define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; LE-64BIT-LABEL: lshr_32bytes: +; LE-64BIT: # %bb.0: +; LE-64BIT-NEXT: lwz 4, 0(4) +; LE-64BIT-NEXT: ld 7, 0(3) +; LE-64BIT-NEXT: ld 8, 8(3) +; LE-64BIT-NEXT: ld 9, 16(3) +; LE-64BIT-NEXT: li 6, 0 +; LE-64BIT-NEXT: ld 3, 24(3) +; LE-64BIT-NEXT: std 28, -32(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: std 21, -88(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: std 24, -64(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: subfic 28, 4, 64 +; LE-64BIT-NEXT: std 25, -56(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: subfic 11, 4, 192 +; LE-64BIT-NEXT: addi 0, 4, -128 +; LE-64BIT-NEXT: subfic 25, 4, 128 +; LE-64BIT-NEXT: std 27, -40(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: std 29, -24(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: srd 29, 9, 4 +; LE-64BIT-NEXT: addi 27, 4, -64 +; LE-64BIT-NEXT: std 22, -80(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: sld 24, 8, 28 +; LE-64BIT-NEXT: sld 21, 9, 28 +; LE-64BIT-NEXT: std 26, -48(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: sld 28, 3, 28 +; LE-64BIT-NEXT: srd 10, 7, 4 +; LE-64BIT-NEXT: std 30, -16(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: addi 30, 4, -192 +; LE-64BIT-NEXT: sld 11, 3, 11 +; LE-64BIT-NEXT: subfic 22, 25, 64 +; LE-64BIT-NEXT: or 29, 29, 28 +; LE-64BIT-NEXT: srd 26, 9, 0 +; LE-64BIT-NEXT: srd 28, 3, 27 +; LE-64BIT-NEXT: std 23, -72(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: or 10, 10, 24 +; LE-64BIT-NEXT: ld 24, -64(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: srd 30, 3, 30 +; LE-64BIT-NEXT: srd 23, 8, 27 +; LE-64BIT-NEXT: or 11, 26, 11 +; LE-64BIT-NEXT: or 29, 29, 28 +; LE-64BIT-NEXT: srd 27, 9, 22 +; LE-64BIT-NEXT: sld 28, 3, 25 +; LE-64BIT-NEXT: or 10, 10, 23 +; LE-64BIT-NEXT: or 11, 11, 30 +; LE-64BIT-NEXT: ld 26, -48(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: sld 9, 9, 25 +; LE-64BIT-NEXT: or 30, 28, 27 +; LE-64BIT-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: ld 27, -40(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: ld 25, -56(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: ld 23, -72(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: cmplwi 4, 128 +; LE-64BIT-NEXT: srd 12, 8, 4 +; LE-64BIT-NEXT: or 9, 10, 9 +; LE-64BIT-NEXT: or 30, 30, 21 +; LE-64BIT-NEXT: ld 22, -80(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: ld 21, -88(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: cmplwi 1, 4, 0 +; LE-64BIT-NEXT: srd 10, 3, 0 +; LE-64BIT-NEXT: isellt 9, 9, 11 +; LE-64BIT-NEXT: or 11, 12, 30 +; LE-64BIT-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: isel 7, 7, 9, 6 +; LE-64BIT-NEXT: srd 3, 3, 4 +; LE-64BIT-NEXT: isellt 9, 11, 10 +; LE-64BIT-NEXT: std 7, 0(5) +; LE-64BIT-NEXT: isellt 0, 29, 6 +; LE-64BIT-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: isel 4, 8, 9, 6 +; LE-64BIT-NEXT: std 0, 16(5) +; LE-64BIT-NEXT: isellt 3, 3, 6 +; LE-64BIT-NEXT: std 4, 8(5) +; LE-64BIT-NEXT: std 3, 24(5) +; LE-64BIT-NEXT: blr +; +; BE-LABEL: lshr_32bytes: +; BE: # %bb.0: +; BE-NEXT: lwz 4, 28(4) +; BE-NEXT: ld 7, 24(3) +; BE-NEXT: ld 8, 16(3) +; BE-NEXT: ld 9, 8(3) +; BE-NEXT: ld 3, 0(3) +; BE-NEXT: std 27, -40(1) # 8-byte Folded Spill +; BE-NEXT: std 30, -16(1) # 8-byte Folded Spill +; BE-NEXT: std 28, -32(1) # 8-byte Folded Spill +; BE-NEXT: std 29, -24(1) # 8-byte Folded Spill +; BE-NEXT: li 6, 0 +; BE-NEXT: subfic 10, 4, 192 +; BE-NEXT: addi 11, 4, -128 +; BE-NEXT: addi 12, 4, -192 +; BE-NEXT: subfic 30, 4, 64 +; BE-NEXT: sld 10, 3, 10 +; BE-NEXT: srd 27, 9, 11 +; BE-NEXT: srd 0, 7, 4 +; BE-NEXT: addi 29, 4, -64 +; BE-NEXT: subfic 28, 4, 128 +; BE-NEXT: srd 12, 3, 12 +; BE-NEXT: or 10, 27, 10 +; BE-NEXT: sld 27, 8, 30 +; BE-NEXT: or 10, 10, 12 +; BE-NEXT: or 0, 0, 27 +; BE-NEXT: srd 27, 8, 29 +; BE-NEXT: subfic 12, 28, 64 +; BE-NEXT: or 0, 0, 27 +; BE-NEXT: sld 27, 3, 28 +; BE-NEXT: srd 12, 9, 12 +; BE-NEXT: sld 28, 9, 28 +; BE-NEXT: cmplwi 4, 128 +; BE-NEXT: or 12, 27, 12 +; BE-NEXT: or 28, 0, 28 +; BE-NEXT: sld 0, 9, 30 +; BE-NEXT: srd 9, 9, 4 +; BE-NEXT: srd 11, 3, 11 +; BE-NEXT: cmplwi 1, 4, 0 +; BE-NEXT: or 12, 12, 0 +; BE-NEXT: srd 0, 8, 4 +; BE-NEXT: bc 12, 0, .LBB9_1 +; BE-NEXT: b .LBB9_2 +; BE-NEXT: .LBB9_1: +; BE-NEXT: addi 10, 28, 0 +; BE-NEXT: .LBB9_2: +; BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload +; BE-NEXT: or 12, 0, 12 +; BE-NEXT: sld 0, 3, 30 +; BE-NEXT: srd 30, 3, 29 +; BE-NEXT: bc 12, 0, .LBB9_3 +; BE-NEXT: b .LBB9_4 +; BE-NEXT: .LBB9_3: +; BE-NEXT: addi 11, 12, 0 +; BE-NEXT: .LBB9_4: +; BE-NEXT: srd 3, 3, 4 +; BE-NEXT: bc 12, 6, .LBB9_6 +; BE-NEXT: # %bb.5: +; BE-NEXT: ori 4, 10, 0 +; BE-NEXT: b .LBB9_7 +; BE-NEXT: .LBB9_6: +; BE-NEXT: addi 4, 7, 0 +; BE-NEXT: .LBB9_7: +; BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; BE-NEXT: or 9, 9, 0 +; BE-NEXT: or 9, 9, 30 +; BE-NEXT: bc 12, 6, .LBB9_9 +; BE-NEXT: # %bb.8: +; BE-NEXT: ori 7, 11, 0 +; BE-NEXT: b .LBB9_10 +; BE-NEXT: .LBB9_9: +; BE-NEXT: addi 7, 8, 0 +; BE-NEXT: .LBB9_10: +; BE-NEXT: bc 12, 0, .LBB9_12 +; BE-NEXT: # %bb.11: +; BE-NEXT: ori 8, 6, 0 +; BE-NEXT: ori 3, 6, 0 +; BE-NEXT: b .LBB9_13 +; BE-NEXT: .LBB9_12: +; BE-NEXT: addi 8, 9, 0 +; BE-NEXT: .LBB9_13: +; BE-NEXT: std 4, 24(5) +; BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; BE-NEXT: std 3, 0(5) +; BE-NEXT: std 8, 8(5) +; BE-NEXT: std 7, 16(5) +; BE-NEXT: blr +; +; LE-32BIT-LABEL: lshr_32bytes: +; LE-32BIT: # %bb.0: +; LE-32BIT-NEXT: stwu 1, -144(1) +; LE-32BIT-NEXT: mfcr 12 +; LE-32BIT-NEXT: stw 14, 72(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 15, 76(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 16, 80(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 17, 84(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 18, 88(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 19, 92(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 20, 96(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 21, 100(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 22, 104(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 23, 108(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 24, 112(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 25, 116(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 26, 120(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 27, 124(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 28, 128(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 29, 132(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 30, 136(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 31, 140(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 12, 68(1) +; LE-32BIT-NEXT: lwz 30, 28(4) +; LE-32BIT-NEXT: lwz 9, 28(3) +; LE-32BIT-NEXT: lwz 10, 4(3) +; LE-32BIT-NEXT: subfic 21, 30, 224 +; LE-32BIT-NEXT: lwz 11, 0(3) +; LE-32BIT-NEXT: subfic 4, 30, 160 +; LE-32BIT-NEXT: stw 5, 64(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: addi 0, 30, -128 +; LE-32BIT-NEXT: lwz 5, 24(3) +; LE-32BIT-NEXT: subfic 28, 30, 96 +; LE-32BIT-NEXT: lwz 19, 20(3) +; LE-32BIT-NEXT: addi 29, 30, -64 +; LE-32BIT-NEXT: lwz 8, 16(3) +; LE-32BIT-NEXT: srw 20, 9, 30 +; LE-32BIT-NEXT: lwz 12, 12(3) +; LE-32BIT-NEXT: slw 21, 11, 21 +; LE-32BIT-NEXT: lwz 6, 8(3) +; LE-32BIT-NEXT: addi 3, 30, -192 +; LE-32BIT-NEXT: stw 9, 60(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: subfic 9, 30, 32 +; LE-32BIT-NEXT: srw 16, 10, 3 +; LE-32BIT-NEXT: stw 3, 56(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: slw 15, 6, 4 +; LE-32BIT-NEXT: srw 14, 12, 0 +; LE-32BIT-NEXT: slw 31, 8, 28 +; LE-32BIT-NEXT: srw 3, 19, 29 +; LE-32BIT-NEXT: or 21, 16, 21 +; LE-32BIT-NEXT: slw 16, 5, 9 +; LE-32BIT-NEXT: srw 25, 19, 30 +; LE-32BIT-NEXT: or 15, 14, 15 +; LE-32BIT-NEXT: slw 14, 8, 9 +; LE-32BIT-NEXT: or 3, 3, 31 +; LE-32BIT-NEXT: slw 31, 11, 4 +; LE-32BIT-NEXT: or 20, 20, 16 +; LE-32BIT-NEXT: srw 16, 10, 0 +; LE-32BIT-NEXT: addi 26, 30, -224 +; LE-32BIT-NEXT: stw 4, 36(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: or 4, 25, 14 +; LE-32BIT-NEXT: slw 14, 11, 28 +; LE-32BIT-NEXT: or 16, 16, 31 +; LE-32BIT-NEXT: srw 31, 10, 29 +; LE-32BIT-NEXT: addi 23, 30, -160 +; LE-32BIT-NEXT: srw 18, 12, 30 +; LE-32BIT-NEXT: stw 0, 40(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: or 14, 31, 14 +; LE-32BIT-NEXT: stw 29, 52(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: mr 29, 6 +; LE-32BIT-NEXT: slw 31, 6, 9 +; LE-32BIT-NEXT: srw 0, 11, 26 +; LE-32BIT-NEXT: addi 24, 30, -96 +; LE-32BIT-NEXT: srw 17, 10, 30 +; LE-32BIT-NEXT: or 18, 18, 31 +; LE-32BIT-NEXT: slw 31, 11, 9 +; LE-32BIT-NEXT: or 6, 21, 0 +; LE-32BIT-NEXT: srw 0, 29, 23 +; LE-32BIT-NEXT: or 17, 17, 31 +; LE-32BIT-NEXT: addi 31, 30, -32 +; LE-32BIT-NEXT: or 0, 15, 0 +; LE-32BIT-NEXT: srw 15, 8, 24 +; LE-32BIT-NEXT: or 3, 3, 15 +; LE-32BIT-NEXT: srw 15, 5, 31 +; LE-32BIT-NEXT: or 20, 20, 15 +; LE-32BIT-NEXT: srw 15, 8, 31 +; LE-32BIT-NEXT: stw 3, 28(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: or 3, 4, 15 +; LE-32BIT-NEXT: srw 23, 11, 23 +; LE-32BIT-NEXT: stw 3, 48(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: subfic 15, 30, 64 +; LE-32BIT-NEXT: or 3, 16, 23 +; LE-32BIT-NEXT: stw 3, 44(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: subfic 3, 15, 32 +; LE-32BIT-NEXT: slw 16, 29, 15 +; LE-32BIT-NEXT: srw 22, 12, 3 +; LE-32BIT-NEXT: or 21, 16, 22 +; LE-32BIT-NEXT: subfic 16, 30, 128 +; LE-32BIT-NEXT: mr 7, 10 +; LE-32BIT-NEXT: mr 10, 5 +; LE-32BIT-NEXT: subfic 5, 16, 32 +; LE-32BIT-NEXT: stw 6, 32(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: mr 6, 24 +; LE-32BIT-NEXT: slw 4, 11, 16 +; LE-32BIT-NEXT: srw 24, 7, 5 +; LE-32BIT-NEXT: or 22, 4, 24 +; LE-32BIT-NEXT: slw 24, 29, 16 +; LE-32BIT-NEXT: srw 27, 12, 5 +; LE-32BIT-NEXT: or 27, 24, 27 +; LE-32BIT-NEXT: slw 24, 8, 15 +; LE-32BIT-NEXT: srw 26, 19, 3 +; LE-32BIT-NEXT: or 26, 24, 26 +; LE-32BIT-NEXT: subfic 24, 30, 192 +; LE-32BIT-NEXT: mr 25, 28 +; LE-32BIT-NEXT: subfic 28, 24, 32 +; LE-32BIT-NEXT: mr 23, 19 +; LE-32BIT-NEXT: srw 28, 7, 28 +; LE-32BIT-NEXT: slw 19, 11, 24 +; LE-32BIT-NEXT: mr 4, 29 +; LE-32BIT-NEXT: or 28, 19, 28 +; LE-32BIT-NEXT: srw 19, 11, 6 +; LE-32BIT-NEXT: or 19, 14, 19 +; LE-32BIT-NEXT: srw 14, 4, 31 +; LE-32BIT-NEXT: or 6, 18, 14 +; LE-32BIT-NEXT: lwz 18, 64(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: srw 3, 7, 3 +; LE-32BIT-NEXT: slw 14, 11, 15 +; LE-32BIT-NEXT: cmplwi 1, 30, 64 +; LE-32BIT-NEXT: cmplwi 30, 128 +; LE-32BIT-NEXT: slw 24, 7, 24 +; LE-32BIT-NEXT: mr 29, 12 +; LE-32BIT-NEXT: or 12, 14, 3 +; LE-32BIT-NEXT: srw 14, 11, 31 +; LE-32BIT-NEXT: crnand 28, 0, 4 +; LE-32BIT-NEXT: srw 31, 11, 30 +; LE-32BIT-NEXT: or 24, 0, 24 +; LE-32BIT-NEXT: slw 0, 23, 15 +; LE-32BIT-NEXT: or 17, 17, 14 +; LE-32BIT-NEXT: bc 12, 28, .LBB9_2 +; LE-32BIT-NEXT: # %bb.1: +; LE-32BIT-NEXT: ori 14, 31, 0 +; LE-32BIT-NEXT: b .LBB9_3 +; LE-32BIT-NEXT: .LBB9_2: +; LE-32BIT-NEXT: li 14, 0 +; LE-32BIT-NEXT: .LBB9_3: +; LE-32BIT-NEXT: or 20, 20, 0 +; LE-32BIT-NEXT: subfic 0, 16, 64 +; LE-32BIT-NEXT: stw 14, 0(18) +; LE-32BIT-NEXT: subfic 14, 0, 32 +; LE-32BIT-NEXT: slw 14, 4, 14 +; LE-32BIT-NEXT: srw 31, 29, 0 +; LE-32BIT-NEXT: or 14, 31, 14 +; LE-32BIT-NEXT: slw 31, 29, 9 +; LE-32BIT-NEXT: mr 3, 29 +; LE-32BIT-NEXT: or 29, 21, 31 +; LE-32BIT-NEXT: slw 31, 7, 25 +; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: or 29, 22, 31 +; LE-32BIT-NEXT: slw 31, 3, 25 +; LE-32BIT-NEXT: or 27, 27, 31 +; LE-32BIT-NEXT: stw 27, 24(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: slw 31, 23, 9 +; LE-32BIT-NEXT: lwz 27, 36(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: or 26, 26, 31 +; LE-32BIT-NEXT: slw 25, 7, 9 +; LE-32BIT-NEXT: or 12, 12, 25 +; LE-32BIT-NEXT: slw 31, 7, 27 +; LE-32BIT-NEXT: or 28, 28, 31 +; LE-32BIT-NEXT: slw 31, 7, 15 +; LE-32BIT-NEXT: or 22, 6, 31 +; LE-32BIT-NEXT: lwz 31, 40(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: srw 0, 4, 0 +; LE-32BIT-NEXT: lwz 6, 32(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: or 27, 29, 0 +; LE-32BIT-NEXT: cmplwi 6, 31, 64 +; LE-32BIT-NEXT: srw 0, 10, 30 +; LE-32BIT-NEXT: bc 12, 24, .LBB9_5 +; LE-32BIT-NEXT: # %bb.4: +; LE-32BIT-NEXT: ori 25, 6, 0 +; LE-32BIT-NEXT: b .LBB9_6 +; LE-32BIT-NEXT: .LBB9_5: +; LE-32BIT-NEXT: addi 25, 24, 0 +; LE-32BIT-NEXT: .LBB9_6: +; LE-32BIT-NEXT: lwz 6, 28(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: or 26, 0, 26 +; LE-32BIT-NEXT: srw 0, 4, 31 +; LE-32BIT-NEXT: or 28, 0, 28 +; LE-32BIT-NEXT: srw 0, 4, 30 +; LE-32BIT-NEXT: bc 12, 4, .LBB9_8 +; LE-32BIT-NEXT: # %bb.7: +; LE-32BIT-NEXT: ori 9, 6, 0 +; LE-32BIT-NEXT: b .LBB9_9 +; LE-32BIT-NEXT: .LBB9_8: +; LE-32BIT-NEXT: addi 9, 20, 0 +; LE-32BIT-NEXT: .LBB9_9: +; LE-32BIT-NEXT: or 6, 0, 12 +; LE-32BIT-NEXT: lwz 12, 52(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: srw 5, 4, 5 +; LE-32BIT-NEXT: bc 12, 28, .LBB9_11 +; LE-32BIT-NEXT: # %bb.10: +; LE-32BIT-NEXT: ori 0, 17, 0 +; LE-32BIT-NEXT: b .LBB9_12 +; LE-32BIT-NEXT: .LBB9_11: +; LE-32BIT-NEXT: li 0, 0 +; LE-32BIT-NEXT: .LBB9_12: +; LE-32BIT-NEXT: or 5, 14, 5 +; LE-32BIT-NEXT: stw 0, 4(18) +; LE-32BIT-NEXT: slw 21, 3, 16 +; LE-32BIT-NEXT: cmplwi 7, 16, 64 +; LE-32BIT-NEXT: cmplwi 3, 16, 0 +; LE-32BIT-NEXT: slw 0, 7, 16 +; LE-32BIT-NEXT: li 16, 0 +; LE-32BIT-NEXT: bc 12, 4, .LBB9_14 +; LE-32BIT-NEXT: # %bb.13: +; LE-32BIT-NEXT: ori 24, 19, 0 +; LE-32BIT-NEXT: b .LBB9_15 +; LE-32BIT-NEXT: .LBB9_14: +; LE-32BIT-NEXT: addi 24, 22, 0 +; LE-32BIT-NEXT: .LBB9_15: +; LE-32BIT-NEXT: cmplwi 5, 30, 0 +; LE-32BIT-NEXT: cmplwi 2, 31, 0 +; LE-32BIT-NEXT: or 5, 0, 5 +; LE-32BIT-NEXT: srw 17, 11, 12 +; LE-32BIT-NEXT: bc 12, 28, .LBB9_17 +; LE-32BIT-NEXT: # %bb.16: +; LE-32BIT-NEXT: ori 0, 16, 0 +; LE-32BIT-NEXT: b .LBB9_18 +; LE-32BIT-NEXT: .LBB9_17: +; LE-32BIT-NEXT: addi 0, 21, 0 +; LE-32BIT-NEXT: .LBB9_18: +; LE-32BIT-NEXT: lwz 21, 60(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: slw 20, 3, 15 +; LE-32BIT-NEXT: srw 19, 8, 12 +; LE-32BIT-NEXT: bc 12, 10, .LBB9_19 +; LE-32BIT-NEXT: b .LBB9_20 +; LE-32BIT-NEXT: .LBB9_19: +; LE-32BIT-NEXT: addi 25, 3, 0 +; LE-32BIT-NEXT: .LBB9_20: +; LE-32BIT-NEXT: bc 12, 22, .LBB9_22 +; LE-32BIT-NEXT: # %bb.21: +; LE-32BIT-NEXT: ori 12, 24, 0 +; LE-32BIT-NEXT: b .LBB9_23 +; LE-32BIT-NEXT: .LBB9_22: +; LE-32BIT-NEXT: addi 12, 3, 0 +; LE-32BIT-NEXT: .LBB9_23: +; LE-32BIT-NEXT: bc 12, 4, .LBB9_25 +; LE-32BIT-NEXT: # %bb.24: +; LE-32BIT-NEXT: ori 3, 17, 0 +; LE-32BIT-NEXT: b .LBB9_26 +; LE-32BIT-NEXT: .LBB9_25: +; LE-32BIT-NEXT: addi 3, 6, 0 +; LE-32BIT-NEXT: .LBB9_26: +; LE-32BIT-NEXT: lwz 6, 48(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: srw 30, 8, 30 +; LE-32BIT-NEXT: srw 29, 11, 31 +; LE-32BIT-NEXT: bc 12, 22, .LBB9_27 +; LE-32BIT-NEXT: b .LBB9_28 +; LE-32BIT-NEXT: .LBB9_27: +; LE-32BIT-NEXT: addi 9, 21, 0 +; LE-32BIT-NEXT: .LBB9_28: +; LE-32BIT-NEXT: mr 22, 4 +; LE-32BIT-NEXT: lwz 4, 56(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: or 9, 9, 0 +; LE-32BIT-NEXT: bc 12, 4, .LBB9_30 +; LE-32BIT-NEXT: # %bb.29: +; LE-32BIT-NEXT: ori 0, 16, 0 +; LE-32BIT-NEXT: b .LBB9_31 +; LE-32BIT-NEXT: .LBB9_30: +; LE-32BIT-NEXT: addi 0, 30, 0 +; LE-32BIT-NEXT: .LBB9_31: +; LE-32BIT-NEXT: bc 12, 24, .LBB9_33 +; LE-32BIT-NEXT: # %bb.32: +; LE-32BIT-NEXT: ori 30, 16, 0 +; LE-32BIT-NEXT: b .LBB9_34 +; LE-32BIT-NEXT: .LBB9_33: +; LE-32BIT-NEXT: addi 30, 29, 0 +; LE-32BIT-NEXT: .LBB9_34: +; LE-32BIT-NEXT: bc 12, 4, .LBB9_36 +; LE-32BIT-NEXT: # %bb.35: +; LE-32BIT-NEXT: ori 29, 16, 0 +; LE-32BIT-NEXT: b .LBB9_37 +; LE-32BIT-NEXT: .LBB9_36: +; LE-32BIT-NEXT: addi 29, 6, 0 +; LE-32BIT-NEXT: .LBB9_37: +; LE-32BIT-NEXT: lwz 6, 44(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: mr 14, 18 +; LE-32BIT-NEXT: srw 18, 11, 4 +; LE-32BIT-NEXT: lwz 4, 20(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: bc 12, 24, .LBB9_39 +; LE-32BIT-NEXT: # %bb.38: +; LE-32BIT-NEXT: ori 24, 16, 0 +; LE-32BIT-NEXT: b .LBB9_40 +; LE-32BIT-NEXT: .LBB9_39: +; LE-32BIT-NEXT: addi 24, 6, 0 +; LE-32BIT-NEXT: .LBB9_40: +; LE-32BIT-NEXT: bc 12, 4, .LBB9_42 +; LE-32BIT-NEXT: # %bb.41: +; LE-32BIT-NEXT: ori 26, 19, 0 +; LE-32BIT-NEXT: b .LBB9_42 +; LE-32BIT-NEXT: .LBB9_42: +; LE-32BIT-NEXT: bc 12, 22, .LBB9_43 +; LE-32BIT-NEXT: b .LBB9_44 +; LE-32BIT-NEXT: .LBB9_43: +; LE-32BIT-NEXT: addi 3, 22, 0 +; LE-32BIT-NEXT: .LBB9_44: +; LE-32BIT-NEXT: bc 12, 28, .LBB9_46 +; LE-32BIT-NEXT: # %bb.45: +; LE-32BIT-NEXT: ori 5, 20, 0 +; LE-32BIT-NEXT: b .LBB9_46 +; LE-32BIT-NEXT: .LBB9_46: +; LE-32BIT-NEXT: bc 12, 0, .LBB9_48 +; LE-32BIT-NEXT: # %bb.47: +; LE-32BIT-NEXT: ori 9, 25, 0 +; LE-32BIT-NEXT: b .LBB9_48 +; LE-32BIT-NEXT: .LBB9_48: +; LE-32BIT-NEXT: bc 12, 24, .LBB9_50 +; LE-32BIT-NEXT: # %bb.49: +; LE-32BIT-NEXT: ori 28, 18, 0 +; LE-32BIT-NEXT: b .LBB9_50 +; LE-32BIT-NEXT: .LBB9_50: +; LE-32BIT-NEXT: bc 12, 0, .LBB9_52 +; LE-32BIT-NEXT: # %bb.51: +; LE-32BIT-NEXT: ori 12, 16, 0 +; LE-32BIT-NEXT: b .LBB9_52 +; LE-32BIT-NEXT: .LBB9_52: +; LE-32BIT-NEXT: lwz 6, 24(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: bc 12, 14, .LBB9_53 +; LE-32BIT-NEXT: b .LBB9_54 +; LE-32BIT-NEXT: .LBB9_53: +; LE-32BIT-NEXT: addi 5, 7, 0 +; LE-32BIT-NEXT: .LBB9_54: +; LE-32BIT-NEXT: bc 12, 10, .LBB9_55 +; LE-32BIT-NEXT: b .LBB9_56 +; LE-32BIT-NEXT: .LBB9_55: +; LE-32BIT-NEXT: addi 28, 22, 0 +; LE-32BIT-NEXT: .LBB9_56: +; LE-32BIT-NEXT: bc 12, 28, .LBB9_57 +; LE-32BIT-NEXT: b .LBB9_58 +; LE-32BIT-NEXT: .LBB9_57: +; LE-32BIT-NEXT: addi 4, 27, 0 +; LE-32BIT-NEXT: .LBB9_58: +; LE-32BIT-NEXT: stw 12, 12(14) +; LE-32BIT-NEXT: bc 12, 14, .LBB9_59 +; LE-32BIT-NEXT: b .LBB9_60 +; LE-32BIT-NEXT: .LBB9_59: +; LE-32BIT-NEXT: addi 4, 11, 0 +; LE-32BIT-NEXT: .LBB9_60: +; LE-32BIT-NEXT: bc 12, 28, .LBB9_62 +; LE-32BIT-NEXT: # %bb.61: +; LE-32BIT-NEXT: ori 27, 16, 0 +; LE-32BIT-NEXT: b .LBB9_63 +; LE-32BIT-NEXT: .LBB9_62: +; LE-32BIT-NEXT: addi 27, 6, 0 +; LE-32BIT-NEXT: .LBB9_63: +; LE-32BIT-NEXT: bc 12, 22, .LBB9_65 +; LE-32BIT-NEXT: # %bb.64: +; LE-32BIT-NEXT: ori 6, 26, 0 +; LE-32BIT-NEXT: b .LBB9_66 +; LE-32BIT-NEXT: .LBB9_65: +; LE-32BIT-NEXT: addi 6, 10, 0 +; LE-32BIT-NEXT: .LBB9_66: +; LE-32BIT-NEXT: li 26, 0 +; LE-32BIT-NEXT: bc 12, 0, .LBB9_68 +; LE-32BIT-NEXT: # %bb.67: +; LE-32BIT-NEXT: ori 3, 26, 0 +; LE-32BIT-NEXT: b .LBB9_68 +; LE-32BIT-NEXT: .LBB9_68: +; LE-32BIT-NEXT: or 6, 6, 27 +; LE-32BIT-NEXT: stw 3, 8(14) +; LE-32BIT-NEXT: or 3, 0, 4 +; LE-32BIT-NEXT: bc 12, 22, .LBB9_70 +; LE-32BIT-NEXT: # %bb.69: +; LE-32BIT-NEXT: ori 4, 9, 0 +; LE-32BIT-NEXT: b .LBB9_71 +; LE-32BIT-NEXT: .LBB9_70: +; LE-32BIT-NEXT: addi 4, 21, 0 +; LE-32BIT-NEXT: .LBB9_71: +; LE-32BIT-NEXT: bc 12, 0, .LBB9_73 +; LE-32BIT-NEXT: # %bb.72: +; LE-32BIT-NEXT: ori 3, 30, 0 +; LE-32BIT-NEXT: ori 6, 28, 0 +; LE-32BIT-NEXT: b .LBB9_73 +; LE-32BIT-NEXT: .LBB9_73: +; LE-32BIT-NEXT: stw 4, 28(14) +; LE-32BIT-NEXT: or 4, 29, 5 +; LE-32BIT-NEXT: bc 12, 0, .LBB9_75 +; LE-32BIT-NEXT: # %bb.74: +; LE-32BIT-NEXT: ori 4, 24, 0 +; LE-32BIT-NEXT: b .LBB9_75 +; LE-32BIT-NEXT: .LBB9_75: +; LE-32BIT-NEXT: bc 12, 22, .LBB9_77 +; LE-32BIT-NEXT: # %bb.76: +; LE-32BIT-NEXT: ori 5, 6, 0 +; LE-32BIT-NEXT: b .LBB9_78 +; LE-32BIT-NEXT: .LBB9_77: +; LE-32BIT-NEXT: addi 3, 8, 0 +; LE-32BIT-NEXT: addi 5, 10, 0 +; LE-32BIT-NEXT: .LBB9_78: +; LE-32BIT-NEXT: stw 3, 16(14) +; LE-32BIT-NEXT: bc 12, 22, .LBB9_80 +; LE-32BIT-NEXT: # %bb.79: +; LE-32BIT-NEXT: ori 3, 4, 0 +; LE-32BIT-NEXT: b .LBB9_81 +; LE-32BIT-NEXT: .LBB9_80: +; LE-32BIT-NEXT: addi 3, 23, 0 +; LE-32BIT-NEXT: .LBB9_81: +; LE-32BIT-NEXT: stw 5, 24(14) +; LE-32BIT-NEXT: stw 3, 20(14) +; LE-32BIT-NEXT: lwz 12, 68(1) +; LE-32BIT-NEXT: lwz 31, 140(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: mtcrf 32, 12 # cr2 +; LE-32BIT-NEXT: mtcrf 16, 12 # cr3 +; LE-32BIT-NEXT: lwz 30, 136(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 29, 132(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 28, 128(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 27, 124(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 26, 120(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 25, 116(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 24, 112(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 23, 108(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 22, 104(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 21, 100(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 20, 96(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 19, 92(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 18, 88(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 17, 84(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 16, 80(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 15, 76(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 14, 72(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: addi 1, 1, 144 +; LE-32BIT-NEXT: blr + %src = load i256, ptr %src.ptr, align 1 + %bitOff = load i256, ptr %bitOff.ptr, align 1 + %res = lshr i256 %src, %bitOff + store i256 %res, ptr %dst, align 1 + ret void +} +define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; LE-64BIT-LABEL: shl_32bytes: +; LE-64BIT: # %bb.0: +; LE-64BIT-NEXT: lwz 4, 0(4) +; LE-64BIT-NEXT: ld 7, 24(3) +; LE-64BIT-NEXT: ld 8, 16(3) +; LE-64BIT-NEXT: ld 9, 8(3) +; LE-64BIT-NEXT: li 6, 0 +; LE-64BIT-NEXT: ld 3, 0(3) +; LE-64BIT-NEXT: std 28, -32(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: std 21, -88(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: std 24, -64(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: subfic 28, 4, 64 +; LE-64BIT-NEXT: std 25, -56(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: subfic 11, 4, 192 +; LE-64BIT-NEXT: addi 0, 4, -128 +; LE-64BIT-NEXT: subfic 25, 4, 128 +; LE-64BIT-NEXT: std 27, -40(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: std 29, -24(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: sld 29, 9, 4 +; LE-64BIT-NEXT: addi 27, 4, -64 +; LE-64BIT-NEXT: std 22, -80(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: srd 24, 8, 28 +; LE-64BIT-NEXT: srd 21, 9, 28 +; LE-64BIT-NEXT: std 26, -48(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: srd 28, 3, 28 +; LE-64BIT-NEXT: sld 10, 7, 4 +; LE-64BIT-NEXT: std 30, -16(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: addi 30, 4, -192 +; LE-64BIT-NEXT: srd 11, 3, 11 +; LE-64BIT-NEXT: subfic 22, 25, 64 +; LE-64BIT-NEXT: or 29, 29, 28 +; LE-64BIT-NEXT: sld 26, 9, 0 +; LE-64BIT-NEXT: sld 28, 3, 27 +; LE-64BIT-NEXT: std 23, -72(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: or 10, 10, 24 +; LE-64BIT-NEXT: ld 24, -64(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: sld 30, 3, 30 +; LE-64BIT-NEXT: sld 23, 8, 27 +; LE-64BIT-NEXT: or 11, 26, 11 +; LE-64BIT-NEXT: or 29, 29, 28 +; LE-64BIT-NEXT: sld 27, 9, 22 +; LE-64BIT-NEXT: srd 28, 3, 25 +; LE-64BIT-NEXT: or 10, 10, 23 +; LE-64BIT-NEXT: or 11, 11, 30 +; LE-64BIT-NEXT: ld 26, -48(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: srd 9, 9, 25 +; LE-64BIT-NEXT: or 30, 28, 27 +; LE-64BIT-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: ld 27, -40(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: ld 25, -56(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: ld 23, -72(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: cmplwi 4, 128 +; LE-64BIT-NEXT: sld 12, 8, 4 +; LE-64BIT-NEXT: or 9, 10, 9 +; LE-64BIT-NEXT: or 30, 30, 21 +; LE-64BIT-NEXT: ld 22, -80(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: ld 21, -88(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: cmplwi 1, 4, 0 +; LE-64BIT-NEXT: sld 10, 3, 0 +; LE-64BIT-NEXT: isellt 9, 9, 11 +; LE-64BIT-NEXT: or 11, 12, 30 +; LE-64BIT-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: isel 7, 7, 9, 6 +; LE-64BIT-NEXT: sld 3, 3, 4 +; LE-64BIT-NEXT: isellt 9, 11, 10 +; LE-64BIT-NEXT: std 7, 24(5) +; LE-64BIT-NEXT: isellt 0, 29, 6 +; LE-64BIT-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: isel 4, 8, 9, 6 +; LE-64BIT-NEXT: std 0, 8(5) +; LE-64BIT-NEXT: isellt 3, 3, 6 +; LE-64BIT-NEXT: std 4, 16(5) +; LE-64BIT-NEXT: std 3, 0(5) +; LE-64BIT-NEXT: blr +; +; BE-LABEL: shl_32bytes: +; BE: # %bb.0: +; BE-NEXT: lwz 4, 28(4) +; BE-NEXT: ld 7, 0(3) +; BE-NEXT: ld 8, 8(3) +; BE-NEXT: ld 9, 16(3) +; BE-NEXT: ld 3, 24(3) +; BE-NEXT: std 27, -40(1) # 8-byte Folded Spill +; BE-NEXT: std 30, -16(1) # 8-byte Folded Spill +; BE-NEXT: std 28, -32(1) # 8-byte Folded Spill +; BE-NEXT: std 29, -24(1) # 8-byte Folded Spill +; BE-NEXT: li 6, 0 +; BE-NEXT: subfic 10, 4, 192 +; BE-NEXT: addi 11, 4, -128 +; BE-NEXT: addi 12, 4, -192 +; BE-NEXT: subfic 30, 4, 64 +; BE-NEXT: srd 10, 3, 10 +; BE-NEXT: sld 27, 9, 11 +; BE-NEXT: sld 0, 7, 4 +; BE-NEXT: addi 29, 4, -64 +; BE-NEXT: subfic 28, 4, 128 +; BE-NEXT: sld 12, 3, 12 +; BE-NEXT: or 10, 27, 10 +; BE-NEXT: srd 27, 8, 30 +; BE-NEXT: or 10, 10, 12 +; BE-NEXT: or 0, 0, 27 +; BE-NEXT: sld 27, 8, 29 +; BE-NEXT: subfic 12, 28, 64 +; BE-NEXT: or 0, 0, 27 +; BE-NEXT: srd 27, 3, 28 +; BE-NEXT: sld 12, 9, 12 +; BE-NEXT: srd 28, 9, 28 +; BE-NEXT: cmplwi 4, 128 +; BE-NEXT: or 12, 27, 12 +; BE-NEXT: or 28, 0, 28 +; BE-NEXT: srd 0, 9, 30 +; BE-NEXT: sld 9, 9, 4 +; BE-NEXT: sld 11, 3, 11 +; BE-NEXT: cmplwi 1, 4, 0 +; BE-NEXT: or 12, 12, 0 +; BE-NEXT: sld 0, 8, 4 +; BE-NEXT: bc 12, 0, .LBB10_1 +; BE-NEXT: b .LBB10_2 +; BE-NEXT: .LBB10_1: +; BE-NEXT: addi 10, 28, 0 +; BE-NEXT: .LBB10_2: +; BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload +; BE-NEXT: or 12, 0, 12 +; BE-NEXT: srd 0, 3, 30 +; BE-NEXT: sld 30, 3, 29 +; BE-NEXT: bc 12, 0, .LBB10_3 +; BE-NEXT: b .LBB10_4 +; BE-NEXT: .LBB10_3: +; BE-NEXT: addi 11, 12, 0 +; BE-NEXT: .LBB10_4: +; BE-NEXT: sld 3, 3, 4 +; BE-NEXT: bc 12, 6, .LBB10_6 +; BE-NEXT: # %bb.5: +; BE-NEXT: ori 4, 10, 0 +; BE-NEXT: b .LBB10_7 +; BE-NEXT: .LBB10_6: +; BE-NEXT: addi 4, 7, 0 +; BE-NEXT: .LBB10_7: +; BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; BE-NEXT: or 9, 9, 0 +; BE-NEXT: or 9, 9, 30 +; BE-NEXT: bc 12, 6, .LBB10_9 +; BE-NEXT: # %bb.8: +; BE-NEXT: ori 7, 11, 0 +; BE-NEXT: b .LBB10_10 +; BE-NEXT: .LBB10_9: +; BE-NEXT: addi 7, 8, 0 +; BE-NEXT: .LBB10_10: +; BE-NEXT: bc 12, 0, .LBB10_12 +; BE-NEXT: # %bb.11: +; BE-NEXT: ori 8, 6, 0 +; BE-NEXT: ori 3, 6, 0 +; BE-NEXT: b .LBB10_13 +; BE-NEXT: .LBB10_12: +; BE-NEXT: addi 8, 9, 0 +; BE-NEXT: .LBB10_13: +; BE-NEXT: std 4, 0(5) +; BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; BE-NEXT: std 3, 24(5) +; BE-NEXT: std 8, 16(5) +; BE-NEXT: std 7, 8(5) +; BE-NEXT: blr +; +; LE-32BIT-LABEL: shl_32bytes: +; LE-32BIT: # %bb.0: +; LE-32BIT-NEXT: stwu 1, -144(1) +; LE-32BIT-NEXT: mfcr 12 +; LE-32BIT-NEXT: stw 14, 72(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 15, 76(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 16, 80(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 17, 84(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 18, 88(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 19, 92(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 20, 96(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 21, 100(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 22, 104(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 23, 108(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 24, 112(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 25, 116(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 26, 120(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 27, 124(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 28, 128(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 29, 132(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 30, 136(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 31, 140(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 12, 68(1) +; LE-32BIT-NEXT: lwz 30, 28(4) +; LE-32BIT-NEXT: stw 5, 64(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: lwz 6, 24(3) +; LE-32BIT-NEXT: subfic 21, 30, 224 +; LE-32BIT-NEXT: lwz 5, 28(3) +; LE-32BIT-NEXT: subfic 29, 30, 160 +; LE-32BIT-NEXT: lwz 7, 4(3) +; LE-32BIT-NEXT: addi 4, 30, -128 +; LE-32BIT-NEXT: lwz 9, 0(3) +; LE-32BIT-NEXT: subfic 28, 30, 96 +; LE-32BIT-NEXT: lwz 10, 8(3) +; LE-32BIT-NEXT: addi 0, 30, -64 +; LE-32BIT-NEXT: lwz 8, 12(3) +; LE-32BIT-NEXT: subfic 25, 30, 32 +; LE-32BIT-NEXT: lwz 12, 16(3) +; LE-32BIT-NEXT: srw 21, 5, 21 +; LE-32BIT-NEXT: lwz 11, 20(3) +; LE-32BIT-NEXT: addi 3, 30, -192 +; LE-32BIT-NEXT: slw 16, 6, 3 +; LE-32BIT-NEXT: stw 3, 56(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: slw 20, 9, 30 +; LE-32BIT-NEXT: srw 15, 11, 29 +; LE-32BIT-NEXT: slw 14, 12, 4 +; LE-32BIT-NEXT: srw 31, 8, 28 +; LE-32BIT-NEXT: slw 3, 10, 0 +; LE-32BIT-NEXT: or 21, 16, 21 +; LE-32BIT-NEXT: srw 16, 7, 25 +; LE-32BIT-NEXT: slw 19, 10, 30 +; LE-32BIT-NEXT: or 15, 14, 15 +; LE-32BIT-NEXT: srw 14, 8, 25 +; LE-32BIT-NEXT: or 3, 3, 31 +; LE-32BIT-NEXT: srw 31, 5, 29 +; LE-32BIT-NEXT: or 20, 20, 16 +; LE-32BIT-NEXT: slw 16, 6, 4 +; LE-32BIT-NEXT: addi 27, 30, -224 +; LE-32BIT-NEXT: or 19, 19, 14 +; LE-32BIT-NEXT: srw 14, 5, 28 +; LE-32BIT-NEXT: or 16, 16, 31 +; LE-32BIT-NEXT: slw 31, 6, 0 +; LE-32BIT-NEXT: addi 23, 30, -160 +; LE-32BIT-NEXT: slw 18, 12, 30 +; LE-32BIT-NEXT: stw 0, 52(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: or 14, 31, 14 +; LE-32BIT-NEXT: srw 31, 11, 25 +; LE-32BIT-NEXT: slw 0, 5, 27 +; LE-32BIT-NEXT: addi 26, 30, -96 +; LE-32BIT-NEXT: slw 17, 6, 30 +; LE-32BIT-NEXT: or 18, 18, 31 +; LE-32BIT-NEXT: srw 31, 5, 25 +; LE-32BIT-NEXT: or 21, 21, 0 +; LE-32BIT-NEXT: slw 0, 11, 23 +; LE-32BIT-NEXT: or 17, 17, 31 +; LE-32BIT-NEXT: addi 31, 30, -32 +; LE-32BIT-NEXT: or 0, 15, 0 +; LE-32BIT-NEXT: slw 15, 8, 26 +; LE-32BIT-NEXT: stw 29, 40(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: or 29, 3, 15 +; LE-32BIT-NEXT: slw 15, 7, 31 +; LE-32BIT-NEXT: or 20, 20, 15 +; LE-32BIT-NEXT: slw 15, 8, 31 +; LE-32BIT-NEXT: or 3, 19, 15 +; LE-32BIT-NEXT: subfic 15, 30, 128 +; LE-32BIT-NEXT: slw 23, 5, 23 +; LE-32BIT-NEXT: stw 3, 48(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: or 3, 16, 23 +; LE-32BIT-NEXT: subfic 16, 15, 32 +; LE-32BIT-NEXT: stw 3, 44(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: srw 3, 11, 15 +; LE-32BIT-NEXT: slw 22, 12, 16 +; LE-32BIT-NEXT: or 23, 3, 22 +; LE-32BIT-NEXT: subfic 22, 30, 64 +; LE-32BIT-NEXT: stw 9, 60(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: mr 9, 10 +; LE-32BIT-NEXT: subfic 3, 22, 32 +; LE-32BIT-NEXT: stw 4, 36(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: srw 4, 8, 22 +; LE-32BIT-NEXT: slw 24, 9, 3 +; LE-32BIT-NEXT: or 4, 4, 24 +; LE-32BIT-NEXT: subfic 24, 30, 192 +; LE-32BIT-NEXT: subfic 27, 24, 32 +; LE-32BIT-NEXT: mr 10, 26 +; LE-32BIT-NEXT: slw 27, 6, 27 +; LE-32BIT-NEXT: srw 26, 5, 24 +; LE-32BIT-NEXT: stw 28, 24(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: or 27, 26, 27 +; LE-32BIT-NEXT: srw 26, 11, 22 +; LE-32BIT-NEXT: slw 28, 12, 3 +; LE-32BIT-NEXT: or 28, 26, 28 +; LE-32BIT-NEXT: srw 26, 5, 15 +; LE-32BIT-NEXT: slw 19, 6, 16 +; LE-32BIT-NEXT: or 26, 26, 19 +; LE-32BIT-NEXT: slw 19, 5, 10 +; LE-32BIT-NEXT: stw 7, 32(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: mr 7, 9 +; LE-32BIT-NEXT: or 19, 14, 19 +; LE-32BIT-NEXT: slw 14, 11, 31 +; LE-32BIT-NEXT: lwz 9, 64(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: or 18, 18, 14 +; LE-32BIT-NEXT: slw 3, 6, 3 +; LE-32BIT-NEXT: srw 14, 5, 22 +; LE-32BIT-NEXT: cmplwi 1, 30, 64 +; LE-32BIT-NEXT: cmplwi 30, 128 +; LE-32BIT-NEXT: srw 24, 6, 24 +; LE-32BIT-NEXT: or 10, 14, 3 +; LE-32BIT-NEXT: slw 14, 5, 31 +; LE-32BIT-NEXT: crnand 28, 0, 4 +; LE-32BIT-NEXT: slw 31, 5, 30 +; LE-32BIT-NEXT: or 24, 0, 24 +; LE-32BIT-NEXT: mr 3, 7 +; LE-32BIT-NEXT: stw 7, 28(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: srw 0, 7, 22 +; LE-32BIT-NEXT: lwz 7, 24(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: or 17, 17, 14 +; LE-32BIT-NEXT: bc 12, 28, .LBB10_2 +; LE-32BIT-NEXT: # %bb.1: +; LE-32BIT-NEXT: ori 14, 31, 0 +; LE-32BIT-NEXT: b .LBB10_3 +; LE-32BIT-NEXT: .LBB10_2: +; LE-32BIT-NEXT: li 14, 0 +; LE-32BIT-NEXT: .LBB10_3: +; LE-32BIT-NEXT: or 20, 20, 0 +; LE-32BIT-NEXT: subfic 0, 15, 64 +; LE-32BIT-NEXT: stw 14, 28(9) +; LE-32BIT-NEXT: subfic 14, 0, 32 +; LE-32BIT-NEXT: srw 14, 11, 14 +; LE-32BIT-NEXT: slw 31, 12, 0 +; LE-32BIT-NEXT: or 14, 31, 14 +; LE-32BIT-NEXT: srw 31, 12, 7 +; LE-32BIT-NEXT: or 23, 23, 31 +; LE-32BIT-NEXT: srw 31, 3, 25 +; LE-32BIT-NEXT: lwz 3, 40(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: or 4, 4, 31 +; LE-32BIT-NEXT: slw 0, 11, 0 +; LE-32BIT-NEXT: cmplwi 3, 15, 0 +; LE-32BIT-NEXT: srw 31, 6, 3 +; LE-32BIT-NEXT: or 27, 27, 31 +; LE-32BIT-NEXT: srw 31, 12, 25 +; LE-32BIT-NEXT: or 28, 28, 31 +; LE-32BIT-NEXT: srw 31, 6, 7 +; LE-32BIT-NEXT: or 26, 26, 31 +; LE-32BIT-NEXT: srw 31, 6, 22 +; LE-32BIT-NEXT: or 18, 18, 31 +; LE-32BIT-NEXT: lwz 31, 36(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: srw 25, 6, 25 +; LE-32BIT-NEXT: or 3, 10, 25 +; LE-32BIT-NEXT: or 26, 26, 0 +; LE-32BIT-NEXT: cmplwi 6, 31, 64 +; LE-32BIT-NEXT: bc 12, 24, .LBB10_5 +; LE-32BIT-NEXT: # %bb.4: +; LE-32BIT-NEXT: ori 25, 21, 0 +; LE-32BIT-NEXT: b .LBB10_6 +; LE-32BIT-NEXT: .LBB10_5: +; LE-32BIT-NEXT: addi 25, 24, 0 +; LE-32BIT-NEXT: .LBB10_6: +; LE-32BIT-NEXT: slw 24, 11, 16 +; LE-32BIT-NEXT: slw 0, 11, 30 +; LE-32BIT-NEXT: or 24, 14, 24 +; LE-32BIT-NEXT: lwz 14, 32(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: or 3, 0, 3 +; LE-32BIT-NEXT: bc 12, 28, .LBB10_8 +; LE-32BIT-NEXT: # %bb.7: +; LE-32BIT-NEXT: ori 0, 17, 0 +; LE-32BIT-NEXT: b .LBB10_9 +; LE-32BIT-NEXT: .LBB10_8: +; LE-32BIT-NEXT: li 0, 0 +; LE-32BIT-NEXT: .LBB10_9: +; LE-32BIT-NEXT: bc 12, 4, .LBB10_11 +; LE-32BIT-NEXT: # %bb.10: +; LE-32BIT-NEXT: ori 7, 29, 0 +; LE-32BIT-NEXT: b .LBB10_12 +; LE-32BIT-NEXT: .LBB10_11: +; LE-32BIT-NEXT: addi 7, 20, 0 +; LE-32BIT-NEXT: .LBB10_12: +; LE-32BIT-NEXT: srw 20, 12, 15 +; LE-32BIT-NEXT: stw 0, 24(9) +; LE-32BIT-NEXT: cmplwi 7, 15, 64 +; LE-32BIT-NEXT: srw 0, 6, 15 +; LE-32BIT-NEXT: li 15, 0 +; LE-32BIT-NEXT: mr 16, 9 +; LE-32BIT-NEXT: or 24, 0, 24 +; LE-32BIT-NEXT: lwz 9, 52(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: bc 12, 28, .LBB10_14 +; LE-32BIT-NEXT: # %bb.13: +; LE-32BIT-NEXT: ori 0, 15, 0 +; LE-32BIT-NEXT: b .LBB10_15 +; LE-32BIT-NEXT: .LBB10_14: +; LE-32BIT-NEXT: addi 0, 20, 0 +; LE-32BIT-NEXT: .LBB10_15: +; LE-32BIT-NEXT: slw 21, 14, 30 +; LE-32BIT-NEXT: lwz 20, 60(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: or 4, 21, 4 +; LE-32BIT-NEXT: slw 21, 11, 31 +; LE-32BIT-NEXT: cmplwi 5, 30, 0 +; LE-32BIT-NEXT: or 27, 21, 27 +; LE-32BIT-NEXT: bc 12, 4, .LBB10_17 +; LE-32BIT-NEXT: # %bb.16: +; LE-32BIT-NEXT: ori 21, 19, 0 +; LE-32BIT-NEXT: b .LBB10_18 +; LE-32BIT-NEXT: .LBB10_17: +; LE-32BIT-NEXT: addi 21, 18, 0 +; LE-32BIT-NEXT: .LBB10_18: +; LE-32BIT-NEXT: slw 19, 8, 9 +; LE-32BIT-NEXT: slw 17, 5, 9 +; LE-32BIT-NEXT: bc 12, 22, .LBB10_20 +; LE-32BIT-NEXT: # %bb.19: +; LE-32BIT-NEXT: ori 9, 7, 0 +; LE-32BIT-NEXT: b .LBB10_21 +; LE-32BIT-NEXT: .LBB10_20: +; LE-32BIT-NEXT: addi 9, 20, 0 +; LE-32BIT-NEXT: .LBB10_21: +; LE-32BIT-NEXT: lwz 7, 48(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: slw 30, 8, 30 +; LE-32BIT-NEXT: lwz 10, 56(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: slw 29, 5, 31 +; LE-32BIT-NEXT: or 9, 9, 0 +; LE-32BIT-NEXT: bc 12, 4, .LBB10_23 +; LE-32BIT-NEXT: # %bb.22: +; LE-32BIT-NEXT: ori 0, 15, 0 +; LE-32BIT-NEXT: b .LBB10_24 +; LE-32BIT-NEXT: .LBB10_23: +; LE-32BIT-NEXT: addi 0, 30, 0 +; LE-32BIT-NEXT: .LBB10_24: +; LE-32BIT-NEXT: bc 12, 24, .LBB10_26 +; LE-32BIT-NEXT: # %bb.25: +; LE-32BIT-NEXT: ori 30, 15, 0 +; LE-32BIT-NEXT: b .LBB10_27 +; LE-32BIT-NEXT: .LBB10_26: +; LE-32BIT-NEXT: addi 30, 29, 0 +; LE-32BIT-NEXT: .LBB10_27: +; LE-32BIT-NEXT: bc 12, 4, .LBB10_29 +; LE-32BIT-NEXT: # %bb.28: +; LE-32BIT-NEXT: ori 29, 15, 0 +; LE-32BIT-NEXT: b .LBB10_30 +; LE-32BIT-NEXT: .LBB10_29: +; LE-32BIT-NEXT: addi 29, 7, 0 +; LE-32BIT-NEXT: .LBB10_30: +; LE-32BIT-NEXT: lwz 7, 44(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: bc 12, 28, .LBB10_31 +; LE-32BIT-NEXT: b .LBB10_32 +; LE-32BIT-NEXT: .LBB10_31: +; LE-32BIT-NEXT: addi 28, 26, 0 +; LE-32BIT-NEXT: .LBB10_32: +; LE-32BIT-NEXT: bc 12, 4, .LBB10_34 +; LE-32BIT-NEXT: # %bb.33: +; LE-32BIT-NEXT: ori 3, 17, 0 +; LE-32BIT-NEXT: b .LBB10_34 +; LE-32BIT-NEXT: .LBB10_34: +; LE-32BIT-NEXT: srw 22, 12, 22 +; LE-32BIT-NEXT: slw 18, 5, 10 +; LE-32BIT-NEXT: bc 12, 4, .LBB10_36 +; LE-32BIT-NEXT: # %bb.35: +; LE-32BIT-NEXT: ori 4, 19, 0 +; LE-32BIT-NEXT: b .LBB10_36 +; LE-32BIT-NEXT: .LBB10_36: +; LE-32BIT-NEXT: bc 12, 14, .LBB10_38 +; LE-32BIT-NEXT: # %bb.37: +; LE-32BIT-NEXT: ori 5, 28, 0 +; LE-32BIT-NEXT: b .LBB10_38 +; LE-32BIT-NEXT: .LBB10_38: +; LE-32BIT-NEXT: li 28, 0 +; LE-32BIT-NEXT: bc 12, 22, .LBB10_39 +; LE-32BIT-NEXT: b .LBB10_40 +; LE-32BIT-NEXT: .LBB10_39: +; LE-32BIT-NEXT: addi 3, 11, 0 +; LE-32BIT-NEXT: .LBB10_40: +; LE-32BIT-NEXT: cmplwi 2, 31, 0 +; LE-32BIT-NEXT: bc 12, 24, .LBB10_42 +; LE-32BIT-NEXT: # %bb.41: +; LE-32BIT-NEXT: ori 27, 18, 0 +; LE-32BIT-NEXT: b .LBB10_42 +; LE-32BIT-NEXT: .LBB10_42: +; LE-32BIT-NEXT: bc 12, 28, .LBB10_44 +; LE-32BIT-NEXT: # %bb.43: +; LE-32BIT-NEXT: ori 26, 22, 0 +; LE-32BIT-NEXT: b .LBB10_45 +; LE-32BIT-NEXT: .LBB10_44: +; LE-32BIT-NEXT: addi 26, 24, 0 +; LE-32BIT-NEXT: .LBB10_45: +; LE-32BIT-NEXT: bc 12, 24, .LBB10_47 +; LE-32BIT-NEXT: # %bb.46: +; LE-32BIT-NEXT: ori 24, 15, 0 +; LE-32BIT-NEXT: b .LBB10_48 +; LE-32BIT-NEXT: .LBB10_47: +; LE-32BIT-NEXT: addi 24, 7, 0 +; LE-32BIT-NEXT: .LBB10_48: +; LE-32BIT-NEXT: bc 12, 28, .LBB10_50 +; LE-32BIT-NEXT: # %bb.49: +; LE-32BIT-NEXT: ori 7, 15, 0 +; LE-32BIT-NEXT: b .LBB10_51 +; LE-32BIT-NEXT: .LBB10_50: +; LE-32BIT-NEXT: addi 7, 23, 0 +; LE-32BIT-NEXT: .LBB10_51: +; LE-32BIT-NEXT: bc 12, 22, .LBB10_52 +; LE-32BIT-NEXT: b .LBB10_53 +; LE-32BIT-NEXT: .LBB10_52: +; LE-32BIT-NEXT: addi 4, 14, 0 +; LE-32BIT-NEXT: .LBB10_53: +; LE-32BIT-NEXT: bc 12, 0, .LBB10_55 +; LE-32BIT-NEXT: # %bb.54: +; LE-32BIT-NEXT: ori 3, 28, 0 +; LE-32BIT-NEXT: b .LBB10_55 +; LE-32BIT-NEXT: .LBB10_55: +; LE-32BIT-NEXT: bc 12, 10, .LBB10_56 +; LE-32BIT-NEXT: b .LBB10_57 +; LE-32BIT-NEXT: .LBB10_56: +; LE-32BIT-NEXT: addi 25, 12, 0 +; LE-32BIT-NEXT: .LBB10_57: +; LE-32BIT-NEXT: or 5, 0, 5 +; LE-32BIT-NEXT: bc 12, 10, .LBB10_58 +; LE-32BIT-NEXT: b .LBB10_59 +; LE-32BIT-NEXT: .LBB10_58: +; LE-32BIT-NEXT: addi 27, 11, 0 +; LE-32BIT-NEXT: .LBB10_59: +; LE-32BIT-NEXT: stw 3, 20(16) +; LE-32BIT-NEXT: or 3, 4, 7 +; LE-32BIT-NEXT: bc 12, 0, .LBB10_61 +; LE-32BIT-NEXT: # %bb.60: +; LE-32BIT-NEXT: ori 3, 27, 0 +; LE-32BIT-NEXT: ori 9, 25, 0 +; LE-32BIT-NEXT: b .LBB10_61 +; LE-32BIT-NEXT: .LBB10_61: +; LE-32BIT-NEXT: bc 12, 14, .LBB10_63 +; LE-32BIT-NEXT: # %bb.62: +; LE-32BIT-NEXT: ori 6, 26, 0 +; LE-32BIT-NEXT: b .LBB10_63 +; LE-32BIT-NEXT: .LBB10_63: +; LE-32BIT-NEXT: bc 12, 22, .LBB10_65 +; LE-32BIT-NEXT: # %bb.64: +; LE-32BIT-NEXT: ori 12, 21, 0 +; LE-32BIT-NEXT: b .LBB10_65 +; LE-32BIT-NEXT: .LBB10_65: +; LE-32BIT-NEXT: bc 12, 0, .LBB10_67 +; LE-32BIT-NEXT: # %bb.66: +; LE-32BIT-NEXT: ori 5, 30, 0 +; LE-32BIT-NEXT: b .LBB10_67 +; LE-32BIT-NEXT: .LBB10_67: +; LE-32BIT-NEXT: bc 12, 22, .LBB10_69 +; LE-32BIT-NEXT: # %bb.68: +; LE-32BIT-NEXT: ori 4, 9, 0 +; LE-32BIT-NEXT: b .LBB10_70 +; LE-32BIT-NEXT: .LBB10_69: +; LE-32BIT-NEXT: addi 3, 14, 0 +; LE-32BIT-NEXT: addi 4, 20, 0 +; LE-32BIT-NEXT: .LBB10_70: +; LE-32BIT-NEXT: bc 12, 0, .LBB10_72 +; LE-32BIT-NEXT: # %bb.71: +; LE-32BIT-NEXT: ori 12, 15, 0 +; LE-32BIT-NEXT: b .LBB10_72 +; LE-32BIT-NEXT: .LBB10_72: +; LE-32BIT-NEXT: bc 12, 22, .LBB10_73 +; LE-32BIT-NEXT: b .LBB10_74 +; LE-32BIT-NEXT: .LBB10_73: +; LE-32BIT-NEXT: addi 5, 8, 0 +; LE-32BIT-NEXT: .LBB10_74: +; LE-32BIT-NEXT: stw 3, 4(16) +; LE-32BIT-NEXT: lwz 3, 28(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: stw 4, 0(16) +; LE-32BIT-NEXT: or 4, 29, 6 +; LE-32BIT-NEXT: bc 12, 0, .LBB10_76 +; LE-32BIT-NEXT: # %bb.75: +; LE-32BIT-NEXT: ori 4, 24, 0 +; LE-32BIT-NEXT: b .LBB10_76 +; LE-32BIT-NEXT: .LBB10_76: +; LE-32BIT-NEXT: stw 12, 16(16) +; LE-32BIT-NEXT: bc 12, 22, .LBB10_78 +; LE-32BIT-NEXT: # %bb.77: +; LE-32BIT-NEXT: ori 3, 4, 0 +; LE-32BIT-NEXT: b .LBB10_78 +; LE-32BIT-NEXT: .LBB10_78: +; LE-32BIT-NEXT: stw 5, 12(16) +; LE-32BIT-NEXT: stw 3, 8(16) +; LE-32BIT-NEXT: lwz 12, 68(1) +; LE-32BIT-NEXT: lwz 31, 140(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: mtcrf 32, 12 # cr2 +; LE-32BIT-NEXT: mtcrf 16, 12 # cr3 +; LE-32BIT-NEXT: lwz 30, 136(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 29, 132(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 28, 128(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 27, 124(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 26, 120(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 25, 116(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 24, 112(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 23, 108(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 22, 104(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 21, 100(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 20, 96(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 19, 92(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 18, 88(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 17, 84(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 16, 80(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 15, 76(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 14, 72(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: addi 1, 1, 144 +; LE-32BIT-NEXT: blr + %src = load i256, ptr %src.ptr, align 1 + %bitOff = load i256, ptr %bitOff.ptr, align 1 + %res = shl i256 %src, %bitOff + store i256 %res, ptr %dst, align 1 + ret void +} +define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; LE-64BIT-LABEL: ashr_32bytes: +; LE-64BIT: # %bb.0: +; LE-64BIT-NEXT: lwz 4, 0(4) +; LE-64BIT-NEXT: ld 7, 16(3) +; LE-64BIT-NEXT: ld 8, 24(3) +; LE-64BIT-NEXT: std 29, -24(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: std 27, -40(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: std 28, -32(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: std 30, -16(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: subfic 9, 4, 192 +; LE-64BIT-NEXT: addi 10, 4, -128 +; LE-64BIT-NEXT: addi 0, 4, -192 +; LE-64BIT-NEXT: subfic 29, 4, 64 +; LE-64BIT-NEXT: ld 6, 0(3) +; LE-64BIT-NEXT: srd 12, 7, 4 +; LE-64BIT-NEXT: sld 9, 8, 9 +; LE-64BIT-NEXT: addi 28, 4, -64 +; LE-64BIT-NEXT: ld 3, 8(3) +; LE-64BIT-NEXT: std 26, -48(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: std 25, -56(1) # 8-byte Folded Spill +; LE-64BIT-NEXT: srd 30, 7, 10 +; LE-64BIT-NEXT: srad 27, 8, 0 +; LE-64BIT-NEXT: cmpwi 0, 1 +; LE-64BIT-NEXT: sld 0, 8, 29 +; LE-64BIT-NEXT: or 9, 30, 9 +; LE-64BIT-NEXT: subfic 30, 4, 128 +; LE-64BIT-NEXT: srad 26, 8, 28 +; LE-64BIT-NEXT: cmpwi 1, 28, 1 +; LE-64BIT-NEXT: or 12, 12, 0 +; LE-64BIT-NEXT: subfic 25, 30, 64 +; LE-64BIT-NEXT: srd 11, 6, 4 +; LE-64BIT-NEXT: isel 12, 12, 26, 4 +; LE-64BIT-NEXT: sld 26, 3, 29 +; LE-64BIT-NEXT: srd 28, 3, 28 +; LE-64BIT-NEXT: or 11, 11, 26 +; LE-64BIT-NEXT: sld 29, 7, 29 +; LE-64BIT-NEXT: srd 26, 7, 25 +; LE-64BIT-NEXT: sld 7, 7, 30 +; LE-64BIT-NEXT: or 11, 11, 28 +; LE-64BIT-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: ld 25, -56(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: sld 30, 8, 30 +; LE-64BIT-NEXT: isellt 9, 9, 27 +; LE-64BIT-NEXT: or 7, 11, 7 +; LE-64BIT-NEXT: cmplwi 4, 128 +; LE-64BIT-NEXT: sradi 27, 8, 63 +; LE-64BIT-NEXT: or 30, 30, 26 +; LE-64BIT-NEXT: ld 26, -48(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: srd 0, 3, 4 +; LE-64BIT-NEXT: isellt 11, 12, 27 +; LE-64BIT-NEXT: or 12, 30, 29 +; LE-64BIT-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: cmplwi 1, 4, 0 +; LE-64BIT-NEXT: srad 10, 8, 10 +; LE-64BIT-NEXT: std 11, 16(5) +; LE-64BIT-NEXT: isellt 7, 7, 9 +; LE-64BIT-NEXT: or 9, 0, 12 +; LE-64BIT-NEXT: isel 6, 6, 7, 6 +; LE-64BIT-NEXT: srad 4, 8, 4 +; LE-64BIT-NEXT: isellt 7, 9, 10 +; LE-64BIT-NEXT: std 6, 0(5) +; LE-64BIT-NEXT: isel 3, 3, 7, 6 +; LE-64BIT-NEXT: isellt 4, 4, 27 +; LE-64BIT-NEXT: ld 27, -40(1) # 8-byte Folded Reload +; LE-64BIT-NEXT: std 3, 8(5) +; LE-64BIT-NEXT: std 4, 24(5) +; LE-64BIT-NEXT: blr +; +; BE-LABEL: ashr_32bytes: +; BE: # %bb.0: +; BE-NEXT: lwz 4, 28(4) +; BE-NEXT: ld 6, 24(3) +; BE-NEXT: ld 7, 16(3) +; BE-NEXT: ld 8, 8(3) +; BE-NEXT: ld 3, 0(3) +; BE-NEXT: std 27, -40(1) # 8-byte Folded Spill +; BE-NEXT: std 29, -24(1) # 8-byte Folded Spill +; BE-NEXT: std 28, -32(1) # 8-byte Folded Spill +; BE-NEXT: std 30, -16(1) # 8-byte Folded Spill +; BE-NEXT: subfic 9, 4, 192 +; BE-NEXT: addi 10, 4, -128 +; BE-NEXT: addi 11, 4, -192 +; BE-NEXT: subfic 0, 4, 64 +; BE-NEXT: sld 9, 3, 9 +; BE-NEXT: srd 27, 8, 10 +; BE-NEXT: srd 12, 6, 4 +; BE-NEXT: subfic 29, 4, 128 +; BE-NEXT: cmpwi 11, 1 +; BE-NEXT: srad 11, 3, 11 +; BE-NEXT: or 9, 27, 9 +; BE-NEXT: sld 27, 7, 0 +; BE-NEXT: addi 30, 4, -64 +; BE-NEXT: srd 28, 8, 4 +; BE-NEXT: or 12, 12, 27 +; BE-NEXT: sld 27, 3, 0 +; BE-NEXT: bc 12, 0, .LBB11_2 +; BE-NEXT: # %bb.1: +; BE-NEXT: ori 9, 11, 0 +; BE-NEXT: b .LBB11_2 +; BE-NEXT: .LBB11_2: +; BE-NEXT: subfic 11, 29, 64 +; BE-NEXT: or 28, 28, 27 +; BE-NEXT: srd 27, 7, 30 +; BE-NEXT: sld 0, 8, 0 +; BE-NEXT: srd 11, 8, 11 +; BE-NEXT: sld 8, 8, 29 +; BE-NEXT: sld 29, 3, 29 +; BE-NEXT: cmplwi 4, 128 +; BE-NEXT: or 12, 12, 27 +; BE-NEXT: or 11, 29, 11 +; BE-NEXT: or 8, 12, 8 +; BE-NEXT: srd 12, 7, 4 +; BE-NEXT: or 11, 11, 0 +; BE-NEXT: cmpwi 1, 30, 1 +; BE-NEXT: srad 30, 3, 30 +; BE-NEXT: bc 12, 0, .LBB11_4 +; BE-NEXT: # %bb.3: +; BE-NEXT: ori 8, 9, 0 +; BE-NEXT: b .LBB11_4 +; BE-NEXT: .LBB11_4: +; BE-NEXT: or 9, 12, 11 +; BE-NEXT: srad 10, 3, 10 +; BE-NEXT: bc 12, 4, .LBB11_6 +; BE-NEXT: # %bb.5: +; BE-NEXT: ori 11, 30, 0 +; BE-NEXT: b .LBB11_7 +; BE-NEXT: .LBB11_6: +; BE-NEXT: addi 11, 28, 0 +; BE-NEXT: .LBB11_7: +; BE-NEXT: cmplwi 1, 4, 0 +; BE-NEXT: bc 12, 0, .LBB11_9 +; BE-NEXT: # %bb.8: +; BE-NEXT: ori 9, 10, 0 +; BE-NEXT: b .LBB11_9 +; BE-NEXT: .LBB11_9: +; BE-NEXT: sradi 10, 3, 63 +; BE-NEXT: srad 3, 3, 4 +; BE-NEXT: bc 12, 6, .LBB11_11 +; BE-NEXT: # %bb.10: +; BE-NEXT: ori 4, 8, 0 +; BE-NEXT: b .LBB11_12 +; BE-NEXT: .LBB11_11: +; BE-NEXT: addi 4, 6, 0 +; BE-NEXT: .LBB11_12: +; BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload +; BE-NEXT: bc 12, 6, .LBB11_14 +; BE-NEXT: # %bb.13: +; BE-NEXT: ori 6, 9, 0 +; BE-NEXT: b .LBB11_15 +; BE-NEXT: .LBB11_14: +; BE-NEXT: addi 6, 7, 0 +; BE-NEXT: .LBB11_15: +; BE-NEXT: bc 12, 0, .LBB11_17 +; BE-NEXT: # %bb.16: +; BE-NEXT: ori 7, 10, 0 +; BE-NEXT: ori 3, 10, 0 +; BE-NEXT: b .LBB11_18 +; BE-NEXT: .LBB11_17: +; BE-NEXT: addi 7, 11, 0 +; BE-NEXT: .LBB11_18: +; BE-NEXT: std 4, 24(5) +; BE-NEXT: std 3, 0(5) +; BE-NEXT: std 7, 8(5) +; BE-NEXT: std 6, 16(5) +; BE-NEXT: blr +; +; LE-32BIT-LABEL: ashr_32bytes: +; LE-32BIT: # %bb.0: +; LE-32BIT-NEXT: stwu 1, -160(1) +; LE-32BIT-NEXT: mfcr 12 +; LE-32BIT-NEXT: stw 14, 88(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 15, 92(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 16, 96(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 17, 100(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 18, 104(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 19, 108(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 20, 112(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 21, 116(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 22, 120(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 23, 124(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 24, 128(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 25, 132(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 26, 136(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 27, 140(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 28, 144(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 29, 148(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 30, 152(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 31, 156(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 12, 84(1) +; LE-32BIT-NEXT: lwz 30, 28(4) +; LE-32BIT-NEXT: lwz 10, 4(3) +; LE-32BIT-NEXT: lwz 6, 0(3) +; LE-32BIT-NEXT: subfic 23, 30, 224 +; LE-32BIT-NEXT: stw 5, 80(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: addi 21, 30, -224 +; LE-32BIT-NEXT: lwz 5, 24(3) +; LE-32BIT-NEXT: subfic 4, 30, 160 +; LE-32BIT-NEXT: lwz 8, 28(3) +; LE-32BIT-NEXT: addi 0, 30, -128 +; LE-32BIT-NEXT: lwz 12, 20(3) +; LE-32BIT-NEXT: subfic 28, 30, 96 +; LE-32BIT-NEXT: lwz 9, 16(3) +; LE-32BIT-NEXT: addi 29, 30, -64 +; LE-32BIT-NEXT: lwz 27, 12(3) +; LE-32BIT-NEXT: subfic 25, 30, 32 +; LE-32BIT-NEXT: lwz 11, 8(3) +; LE-32BIT-NEXT: addi 3, 30, -192 +; LE-32BIT-NEXT: slw 23, 6, 23 +; LE-32BIT-NEXT: srw 16, 10, 3 +; LE-32BIT-NEXT: stw 3, 72(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: srw 20, 8, 30 +; LE-32BIT-NEXT: sraw 15, 6, 21 +; LE-32BIT-NEXT: cmpwi 21, 1 +; LE-32BIT-NEXT: slw 21, 11, 4 +; LE-32BIT-NEXT: srw 14, 27, 0 +; LE-32BIT-NEXT: slw 31, 9, 28 +; LE-32BIT-NEXT: srw 3, 12, 29 +; LE-32BIT-NEXT: or 23, 16, 23 +; LE-32BIT-NEXT: slw 16, 5, 25 +; LE-32BIT-NEXT: srw 19, 12, 30 +; LE-32BIT-NEXT: or 21, 14, 21 +; LE-32BIT-NEXT: slw 14, 9, 25 +; LE-32BIT-NEXT: or 3, 3, 31 +; LE-32BIT-NEXT: slw 31, 6, 4 +; LE-32BIT-NEXT: or 20, 20, 16 +; LE-32BIT-NEXT: srw 16, 10, 0 +; LE-32BIT-NEXT: or 19, 19, 14 +; LE-32BIT-NEXT: slw 14, 6, 28 +; LE-32BIT-NEXT: or 16, 16, 31 +; LE-32BIT-NEXT: srw 31, 10, 29 +; LE-32BIT-NEXT: addi 24, 30, -160 +; LE-32BIT-NEXT: srw 18, 27, 30 +; LE-32BIT-NEXT: or 14, 31, 14 +; LE-32BIT-NEXT: slw 31, 11, 25 +; LE-32BIT-NEXT: addi 7, 30, -96 +; LE-32BIT-NEXT: srw 17, 10, 30 +; LE-32BIT-NEXT: stw 4, 48(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: or 18, 18, 31 +; LE-32BIT-NEXT: slw 31, 6, 25 +; LE-32BIT-NEXT: bc 12, 0, .LBB11_2 +; LE-32BIT-NEXT: # %bb.1: +; LE-32BIT-NEXT: ori 4, 15, 0 +; LE-32BIT-NEXT: b .LBB11_3 +; LE-32BIT-NEXT: .LBB11_2: +; LE-32BIT-NEXT: addi 4, 23, 0 +; LE-32BIT-NEXT: .LBB11_3: +; LE-32BIT-NEXT: srw 15, 11, 24 +; LE-32BIT-NEXT: or 17, 17, 31 +; LE-32BIT-NEXT: addi 31, 30, -32 +; LE-32BIT-NEXT: or 21, 21, 15 +; LE-32BIT-NEXT: srw 15, 9, 7 +; LE-32BIT-NEXT: or 3, 3, 15 +; LE-32BIT-NEXT: srw 15, 5, 31 +; LE-32BIT-NEXT: or 20, 20, 15 +; LE-32BIT-NEXT: srw 15, 9, 31 +; LE-32BIT-NEXT: stw 3, 44(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: or 3, 19, 15 +; LE-32BIT-NEXT: subfic 15, 30, 64 +; LE-32BIT-NEXT: stw 4, 36(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: cmpwi 24, 1 +; LE-32BIT-NEXT: sraw 24, 6, 24 +; LE-32BIT-NEXT: subfic 4, 15, 32 +; LE-32BIT-NEXT: stw 0, 56(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: srw 0, 27, 4 +; LE-32BIT-NEXT: stw 3, 64(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: bc 12, 0, .LBB11_5 +; LE-32BIT-NEXT: # %bb.4: +; LE-32BIT-NEXT: ori 3, 24, 0 +; LE-32BIT-NEXT: b .LBB11_6 +; LE-32BIT-NEXT: .LBB11_5: +; LE-32BIT-NEXT: addi 3, 16, 0 +; LE-32BIT-NEXT: .LBB11_6: +; LE-32BIT-NEXT: slw 16, 11, 15 +; LE-32BIT-NEXT: or 0, 16, 0 +; LE-32BIT-NEXT: subfic 16, 30, 128 +; LE-32BIT-NEXT: stw 5, 52(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: subfic 5, 16, 32 +; LE-32BIT-NEXT: stw 3, 60(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: slw 3, 6, 16 +; LE-32BIT-NEXT: srw 22, 10, 5 +; LE-32BIT-NEXT: stw 29, 68(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: or 29, 3, 22 +; LE-32BIT-NEXT: subfic 3, 30, 192 +; LE-32BIT-NEXT: stw 8, 76(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: mr 8, 12 +; LE-32BIT-NEXT: mr 23, 9 +; LE-32BIT-NEXT: mr 9, 27 +; LE-32BIT-NEXT: slw 22, 11, 16 +; LE-32BIT-NEXT: srw 27, 27, 5 +; LE-32BIT-NEXT: subfic 19, 3, 32 +; LE-32BIT-NEXT: mr 12, 28 +; LE-32BIT-NEXT: or 27, 22, 27 +; LE-32BIT-NEXT: slw 22, 23, 15 +; LE-32BIT-NEXT: srw 26, 8, 4 +; LE-32BIT-NEXT: srw 19, 10, 19 +; LE-32BIT-NEXT: slw 24, 6, 3 +; LE-32BIT-NEXT: srw 4, 10, 4 +; LE-32BIT-NEXT: slw 28, 6, 15 +; LE-32BIT-NEXT: or 26, 22, 26 +; LE-32BIT-NEXT: cmpwi 7, 1 +; LE-32BIT-NEXT: sraw 22, 6, 7 +; LE-32BIT-NEXT: or 24, 24, 19 +; LE-32BIT-NEXT: srw 19, 11, 31 +; LE-32BIT-NEXT: mr 7, 11 +; LE-32BIT-NEXT: or 11, 28, 4 +; LE-32BIT-NEXT: lwz 4, 80(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: bc 12, 0, .LBB11_7 +; LE-32BIT-NEXT: b .LBB11_8 +; LE-32BIT-NEXT: .LBB11_7: +; LE-32BIT-NEXT: addi 22, 14, 0 +; LE-32BIT-NEXT: .LBB11_8: +; LE-32BIT-NEXT: cmplwi 1, 30, 64 +; LE-32BIT-NEXT: cmplwi 30, 128 +; LE-32BIT-NEXT: slw 3, 10, 3 +; LE-32BIT-NEXT: or 19, 18, 19 +; LE-32BIT-NEXT: cmpwi 5, 31, 1 +; LE-32BIT-NEXT: sraw 18, 6, 31 +; LE-32BIT-NEXT: crand 28, 0, 4 +; LE-32BIT-NEXT: srawi 14, 6, 31 +; LE-32BIT-NEXT: sraw 31, 6, 30 +; LE-32BIT-NEXT: or 3, 21, 3 +; LE-32BIT-NEXT: slw 21, 8, 15 +; LE-32BIT-NEXT: bc 12, 20, .LBB11_10 +; LE-32BIT-NEXT: # %bb.9: +; LE-32BIT-NEXT: ori 28, 18, 0 +; LE-32BIT-NEXT: b .LBB11_11 +; LE-32BIT-NEXT: .LBB11_10: +; LE-32BIT-NEXT: addi 28, 17, 0 +; LE-32BIT-NEXT: .LBB11_11: +; LE-32BIT-NEXT: bc 12, 28, .LBB11_13 +; LE-32BIT-NEXT: # %bb.12: +; LE-32BIT-NEXT: ori 18, 14, 0 +; LE-32BIT-NEXT: b .LBB11_14 +; LE-32BIT-NEXT: .LBB11_13: +; LE-32BIT-NEXT: addi 18, 31, 0 +; LE-32BIT-NEXT: .LBB11_14: +; LE-32BIT-NEXT: or 21, 20, 21 +; LE-32BIT-NEXT: subfic 20, 16, 64 +; LE-32BIT-NEXT: stw 18, 0(4) +; LE-32BIT-NEXT: subfic 18, 20, 32 +; LE-32BIT-NEXT: slw 18, 7, 18 +; LE-32BIT-NEXT: srw 17, 9, 20 +; LE-32BIT-NEXT: or 18, 17, 18 +; LE-32BIT-NEXT: slw 17, 9, 25 +; LE-32BIT-NEXT: mr 31, 8 +; LE-32BIT-NEXT: stw 8, 40(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: or 8, 0, 17 +; LE-32BIT-NEXT: slw 0, 10, 12 +; LE-32BIT-NEXT: stw 8, 28(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: or 8, 29, 0 +; LE-32BIT-NEXT: slw 0, 9, 12 +; LE-32BIT-NEXT: or 12, 27, 0 +; LE-32BIT-NEXT: stw 12, 32(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: slw 0, 31, 25 +; LE-32BIT-NEXT: lwz 12, 48(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: or 0, 26, 0 +; LE-32BIT-NEXT: mr 17, 10 +; LE-32BIT-NEXT: slw 25, 10, 25 +; LE-32BIT-NEXT: slw 26, 10, 12 +; LE-32BIT-NEXT: or 26, 24, 26 +; LE-32BIT-NEXT: slw 24, 10, 15 +; LE-32BIT-NEXT: or 24, 19, 24 +; LE-32BIT-NEXT: lwz 19, 56(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: or 11, 11, 25 +; LE-32BIT-NEXT: lwz 10, 36(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: srw 25, 7, 20 +; LE-32BIT-NEXT: cmplwi 6, 19, 64 +; LE-32BIT-NEXT: or 8, 8, 25 +; LE-32BIT-NEXT: bc 12, 24, .LBB11_16 +; LE-32BIT-NEXT: # %bb.15: +; LE-32BIT-NEXT: ori 27, 10, 0 +; LE-32BIT-NEXT: b .LBB11_17 +; LE-32BIT-NEXT: .LBB11_16: +; LE-32BIT-NEXT: addi 27, 3, 0 +; LE-32BIT-NEXT: .LBB11_17: +; LE-32BIT-NEXT: lwz 10, 52(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: srw 5, 7, 5 +; LE-32BIT-NEXT: lwz 3, 44(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: or 5, 18, 5 +; LE-32BIT-NEXT: srw 25, 10, 30 +; LE-32BIT-NEXT: or 25, 25, 0 +; LE-32BIT-NEXT: srw 0, 7, 19 +; LE-32BIT-NEXT: or 26, 0, 26 +; LE-32BIT-NEXT: srw 0, 7, 30 +; LE-32BIT-NEXT: bc 12, 4, .LBB11_19 +; LE-32BIT-NEXT: # %bb.18: +; LE-32BIT-NEXT: ori 29, 3, 0 +; LE-32BIT-NEXT: b .LBB11_20 +; LE-32BIT-NEXT: .LBB11_19: +; LE-32BIT-NEXT: addi 29, 21, 0 +; LE-32BIT-NEXT: .LBB11_20: +; LE-32BIT-NEXT: mr 3, 7 +; LE-32BIT-NEXT: or 11, 0, 11 +; LE-32BIT-NEXT: bc 12, 28, .LBB11_22 +; LE-32BIT-NEXT: # %bb.21: +; LE-32BIT-NEXT: ori 0, 14, 0 +; LE-32BIT-NEXT: b .LBB11_23 +; LE-32BIT-NEXT: .LBB11_22: +; LE-32BIT-NEXT: addi 0, 28, 0 +; LE-32BIT-NEXT: .LBB11_23: +; LE-32BIT-NEXT: lwz 7, 72(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: mr 18, 4 +; LE-32BIT-NEXT: stw 0, 4(4) +; LE-32BIT-NEXT: bc 12, 4, .LBB11_25 +; LE-32BIT-NEXT: # %bb.24: +; LE-32BIT-NEXT: ori 24, 22, 0 +; LE-32BIT-NEXT: b .LBB11_25 +; LE-32BIT-NEXT: .LBB11_25: +; LE-32BIT-NEXT: cmplwi 5, 30, 0 +; LE-32BIT-NEXT: lwz 4, 68(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: cmplwi 2, 19, 0 +; LE-32BIT-NEXT: mr 31, 23 +; LE-32BIT-NEXT: srw 30, 23, 30 +; LE-32BIT-NEXT: slw 28, 9, 16 +; LE-32BIT-NEXT: slw 23, 9, 15 +; LE-32BIT-NEXT: sraw 21, 6, 7 +; LE-32BIT-NEXT: bc 12, 10, .LBB11_27 +; LE-32BIT-NEXT: # %bb.26: +; LE-32BIT-NEXT: ori 7, 27, 0 +; LE-32BIT-NEXT: b .LBB11_28 +; LE-32BIT-NEXT: .LBB11_27: +; LE-32BIT-NEXT: addi 7, 9, 0 +; LE-32BIT-NEXT: .LBB11_28: +; LE-32BIT-NEXT: bc 12, 22, .LBB11_30 +; LE-32BIT-NEXT: # %bb.29: +; LE-32BIT-NEXT: ori 12, 24, 0 +; LE-32BIT-NEXT: b .LBB11_31 +; LE-32BIT-NEXT: .LBB11_30: +; LE-32BIT-NEXT: addi 12, 9, 0 +; LE-32BIT-NEXT: .LBB11_31: +; LE-32BIT-NEXT: lwz 9, 64(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: srw 22, 31, 4 +; LE-32BIT-NEXT: sraw 20, 6, 4 +; LE-32BIT-NEXT: lwz 4, 28(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: cmplwi 7, 16, 64 +; LE-32BIT-NEXT: cmplwi 3, 16, 0 +; LE-32BIT-NEXT: slw 0, 17, 16 +; LE-32BIT-NEXT: lwz 16, 76(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: li 15, 0 +; LE-32BIT-NEXT: or 5, 0, 5 +; LE-32BIT-NEXT: bc 12, 28, .LBB11_33 +; LE-32BIT-NEXT: # %bb.32: +; LE-32BIT-NEXT: ori 0, 15, 0 +; LE-32BIT-NEXT: b .LBB11_34 +; LE-32BIT-NEXT: .LBB11_33: +; LE-32BIT-NEXT: addi 0, 28, 0 +; LE-32BIT-NEXT: .LBB11_34: +; LE-32BIT-NEXT: bc 12, 4, .LBB11_36 +; LE-32BIT-NEXT: # %bb.35: +; LE-32BIT-NEXT: ori 28, 22, 0 +; LE-32BIT-NEXT: ori 25, 15, 0 +; LE-32BIT-NEXT: b .LBB11_37 +; LE-32BIT-NEXT: .LBB11_36: +; LE-32BIT-NEXT: addi 28, 25, 0 +; LE-32BIT-NEXT: addi 25, 9, 0 +; LE-32BIT-NEXT: .LBB11_37: +; LE-32BIT-NEXT: lwz 9, 60(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: bc 12, 28, .LBB11_39 +; LE-32BIT-NEXT: # %bb.38: +; LE-32BIT-NEXT: ori 8, 4, 0 +; LE-32BIT-NEXT: b .LBB11_39 +; LE-32BIT-NEXT: .LBB11_39: +; LE-32BIT-NEXT: bc 12, 4, .LBB11_41 +; LE-32BIT-NEXT: # %bb.40: +; LE-32BIT-NEXT: ori 4, 20, 0 +; LE-32BIT-NEXT: b .LBB11_42 +; LE-32BIT-NEXT: .LBB11_41: +; LE-32BIT-NEXT: addi 4, 11, 0 +; LE-32BIT-NEXT: .LBB11_42: +; LE-32BIT-NEXT: bc 12, 22, .LBB11_43 +; LE-32BIT-NEXT: b .LBB11_44 +; LE-32BIT-NEXT: .LBB11_43: +; LE-32BIT-NEXT: addi 29, 16, 0 +; LE-32BIT-NEXT: .LBB11_44: +; LE-32BIT-NEXT: sraw 19, 6, 19 +; LE-32BIT-NEXT: bc 12, 22, .LBB11_45 +; LE-32BIT-NEXT: b .LBB11_46 +; LE-32BIT-NEXT: .LBB11_45: +; LE-32BIT-NEXT: addi 4, 3, 0 +; LE-32BIT-NEXT: .LBB11_46: +; LE-32BIT-NEXT: or 29, 29, 0 +; LE-32BIT-NEXT: bc 12, 4, .LBB11_48 +; LE-32BIT-NEXT: # %bb.47: +; LE-32BIT-NEXT: ori 0, 15, 0 +; LE-32BIT-NEXT: b .LBB11_49 +; LE-32BIT-NEXT: .LBB11_48: +; LE-32BIT-NEXT: addi 0, 30, 0 +; LE-32BIT-NEXT: .LBB11_49: +; LE-32BIT-NEXT: bc 12, 14, .LBB11_51 +; LE-32BIT-NEXT: # %bb.50: +; LE-32BIT-NEXT: ori 6, 8, 0 +; LE-32BIT-NEXT: b .LBB11_51 +; LE-32BIT-NEXT: .LBB11_51: +; LE-32BIT-NEXT: bc 12, 0, .LBB11_53 +; LE-32BIT-NEXT: # %bb.52: +; LE-32BIT-NEXT: ori 4, 14, 0 +; LE-32BIT-NEXT: b .LBB11_53 +; LE-32BIT-NEXT: .LBB11_53: +; LE-32BIT-NEXT: bc 12, 24, .LBB11_55 +; LE-32BIT-NEXT: # %bb.54: +; LE-32BIT-NEXT: ori 30, 14, 0 +; LE-32BIT-NEXT: ori 26, 21, 0 +; LE-32BIT-NEXT: b .LBB11_56 +; LE-32BIT-NEXT: .LBB11_55: +; LE-32BIT-NEXT: addi 30, 19, 0 +; LE-32BIT-NEXT: .LBB11_56: +; LE-32BIT-NEXT: bc 12, 28, .LBB11_58 +; LE-32BIT-NEXT: # %bb.57: +; LE-32BIT-NEXT: ori 5, 23, 0 +; LE-32BIT-NEXT: b .LBB11_58 +; LE-32BIT-NEXT: .LBB11_58: +; LE-32BIT-NEXT: bc 12, 22, .LBB11_60 +; LE-32BIT-NEXT: # %bb.59: +; LE-32BIT-NEXT: ori 8, 28, 0 +; LE-32BIT-NEXT: b .LBB11_61 +; LE-32BIT-NEXT: .LBB11_60: +; LE-32BIT-NEXT: addi 8, 10, 0 +; LE-32BIT-NEXT: .LBB11_61: +; LE-32BIT-NEXT: bc 12, 0, .LBB11_63 +; LE-32BIT-NEXT: # %bb.62: +; LE-32BIT-NEXT: ori 12, 14, 0 +; LE-32BIT-NEXT: b .LBB11_63 +; LE-32BIT-NEXT: .LBB11_63: +; LE-32BIT-NEXT: bc 12, 24, .LBB11_65 +; LE-32BIT-NEXT: # %bb.64: +; LE-32BIT-NEXT: ori 24, 14, 0 +; LE-32BIT-NEXT: b .LBB11_66 +; LE-32BIT-NEXT: .LBB11_65: +; LE-32BIT-NEXT: addi 24, 9, 0 +; LE-32BIT-NEXT: .LBB11_66: +; LE-32BIT-NEXT: lwz 9, 32(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: bc 12, 10, .LBB11_68 +; LE-32BIT-NEXT: # %bb.67: +; LE-32BIT-NEXT: ori 28, 26, 0 +; LE-32BIT-NEXT: b .LBB11_69 +; LE-32BIT-NEXT: .LBB11_68: +; LE-32BIT-NEXT: addi 28, 3, 0 +; LE-32BIT-NEXT: .LBB11_69: +; LE-32BIT-NEXT: bc 12, 0, .LBB11_71 +; LE-32BIT-NEXT: # %bb.70: +; LE-32BIT-NEXT: ori 3, 7, 0 +; LE-32BIT-NEXT: b .LBB11_72 +; LE-32BIT-NEXT: .LBB11_71: +; LE-32BIT-NEXT: addi 3, 29, 0 +; LE-32BIT-NEXT: .LBB11_72: +; LE-32BIT-NEXT: bc 12, 14, .LBB11_73 +; LE-32BIT-NEXT: b .LBB11_74 +; LE-32BIT-NEXT: .LBB11_73: +; LE-32BIT-NEXT: addi 5, 17, 0 +; LE-32BIT-NEXT: .LBB11_74: +; LE-32BIT-NEXT: stw 4, 8(18) +; LE-32BIT-NEXT: or 4, 0, 6 +; LE-32BIT-NEXT: bc 12, 0, .LBB11_76 +; LE-32BIT-NEXT: # %bb.75: +; LE-32BIT-NEXT: ori 4, 30, 0 +; LE-32BIT-NEXT: b .LBB11_76 +; LE-32BIT-NEXT: .LBB11_76: +; LE-32BIT-NEXT: bc 12, 28, .LBB11_78 +; LE-32BIT-NEXT: # %bb.77: +; LE-32BIT-NEXT: ori 27, 15, 0 +; LE-32BIT-NEXT: b .LBB11_79 +; LE-32BIT-NEXT: .LBB11_78: +; LE-32BIT-NEXT: addi 27, 9, 0 +; LE-32BIT-NEXT: .LBB11_79: +; LE-32BIT-NEXT: bc 12, 22, .LBB11_80 +; LE-32BIT-NEXT: b .LBB11_81 +; LE-32BIT-NEXT: .LBB11_80: +; LE-32BIT-NEXT: addi 3, 16, 0 +; LE-32BIT-NEXT: .LBB11_81: +; LE-32BIT-NEXT: stw 12, 12(18) +; LE-32BIT-NEXT: bc 12, 22, .LBB11_82 +; LE-32BIT-NEXT: b .LBB11_83 +; LE-32BIT-NEXT: .LBB11_82: +; LE-32BIT-NEXT: addi 4, 31, 0 +; LE-32BIT-NEXT: .LBB11_83: +; LE-32BIT-NEXT: or 7, 8, 27 +; LE-32BIT-NEXT: stw 4, 16(18) +; LE-32BIT-NEXT: bc 12, 0, .LBB11_85 +; LE-32BIT-NEXT: # %bb.84: +; LE-32BIT-NEXT: ori 6, 28, 0 +; LE-32BIT-NEXT: b .LBB11_86 +; LE-32BIT-NEXT: .LBB11_85: +; LE-32BIT-NEXT: addi 6, 7, 0 +; LE-32BIT-NEXT: .LBB11_86: +; LE-32BIT-NEXT: lwz 4, 40(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: stw 3, 28(18) +; LE-32BIT-NEXT: or 3, 25, 5 +; LE-32BIT-NEXT: bc 12, 0, .LBB11_88 +; LE-32BIT-NEXT: # %bb.87: +; LE-32BIT-NEXT: ori 3, 24, 0 +; LE-32BIT-NEXT: b .LBB11_88 +; LE-32BIT-NEXT: .LBB11_88: +; LE-32BIT-NEXT: bc 12, 22, .LBB11_90 +; LE-32BIT-NEXT: # %bb.89: +; LE-32BIT-NEXT: ori 5, 6, 0 +; LE-32BIT-NEXT: b .LBB11_91 +; LE-32BIT-NEXT: .LBB11_90: +; LE-32BIT-NEXT: addi 5, 10, 0 +; LE-32BIT-NEXT: addi 3, 4, 0 +; LE-32BIT-NEXT: .LBB11_91: +; LE-32BIT-NEXT: stw 5, 24(18) +; LE-32BIT-NEXT: stw 3, 20(18) +; LE-32BIT-NEXT: lwz 12, 84(1) +; LE-32BIT-NEXT: lwz 31, 156(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: mtcrf 32, 12 # cr2 +; LE-32BIT-NEXT: mtcrf 16, 12 # cr3 +; LE-32BIT-NEXT: lwz 30, 152(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 29, 148(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 28, 144(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 27, 140(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 26, 136(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 25, 132(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 24, 128(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 23, 124(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 22, 120(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 21, 116(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 20, 112(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 19, 108(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 18, 104(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 17, 100(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 16, 96(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 15, 92(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 14, 88(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: addi 1, 1, 160 +; LE-32BIT-NEXT: blr + %src = load i256, ptr %src.ptr, align 1 + %bitOff = load i256, ptr %bitOff.ptr, align 1 + %res = ashr i256 %src, %bitOff + store i256 %res, ptr %dst, align 1 + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; LE: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll new file mode 100644 index 0000000..5ae6230 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll @@ -0,0 +1,4168 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ALL,RV64I +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ALL,RV32I + +define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; RV64I-LABEL: lshr_4bytes: +; RV64I: # %bb.0: +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 2(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: lb a0, 3(a0) +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: srlw a0, a0, a1 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: sb a1, 2(a2) +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: sb a1, 3(a2) +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: ret +; +; RV32I-LABEL: lshr_4bytes: +; RV32I: # %bb.0: +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 0(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a0, 3(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: lbu a3, 1(a1) +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a5, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: srl a0, a0, a1 +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: sb a1, 3(a2) +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: ret + %src = load i32, ptr %src.ptr, align 1 + %bitOff = load i32, ptr %bitOff.ptr, align 1 + %res = lshr i32 %src, %bitOff + store i32 %res, ptr %dst, align 1 + ret void +} +define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; RV64I-LABEL: shl_4bytes: +; RV64I: # %bb.0: +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 2(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: lb a0, 3(a0) +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: sllw a0, a0, a1 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: sb a1, 2(a2) +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: sb a1, 3(a2) +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: ret +; +; RV32I-LABEL: shl_4bytes: +; RV32I: # %bb.0: +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 0(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a0, 3(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: lbu a3, 1(a1) +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a5, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: sb a1, 3(a2) +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: ret + %src = load i32, ptr %src.ptr, align 1 + %bitOff = load i32, ptr %bitOff.ptr, align 1 + %res = shl i32 %src, %bitOff + store i32 %res, ptr %dst, align 1 + ret void +} +define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; RV64I-LABEL: ashr_4bytes: +; RV64I: # %bb.0: +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 2(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: lb a0, 3(a0) +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: sraw a0, a0, a1 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: sb a1, 2(a2) +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: sb a1, 3(a2) +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: ret +; +; RV32I-LABEL: ashr_4bytes: +; RV32I: # %bb.0: +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 0(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a0, 3(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: lbu a3, 1(a1) +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a5, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: sra a0, a0, a1 +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: sb a1, 3(a2) +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: ret + %src = load i32, ptr %src.ptr, align 1 + %bitOff = load i32, ptr %bitOff.ptr, align 1 + %res = ashr i32 %src, %bitOff + store i32 %res, ptr %dst, align 1 + ret void +} + +define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; RV64I-LABEL: lshr_8bytes: +; RV64I: # %bb.0: +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 2(a0) +; RV64I-NEXT: lbu a6, 3(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 5(a0) +; RV64I-NEXT: lbu a5, 4(a0) +; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lbu a3, 1(a1) +; RV64I-NEXT: lbu a4, 0(a1) +; RV64I-NEXT: lbu a5, 2(a1) +; RV64I-NEXT: lbu a6, 3(a1) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 5(a1) +; RV64I-NEXT: lbu a5, 4(a1) +; RV64I-NEXT: lbu a6, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a1, a1, 24 +; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: srl a0, a0, a1 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: sb a1, 6(a2) +; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: sb a1, 7(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: sb a1, 4(a2) +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: sb a1, 5(a2) +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: sb a1, 2(a2) +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: sb a1, 3(a2) +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: ret +; +; RV32I-LABEL: lshr_8bytes: +; RV32I: # %bb.0: +; RV32I-NEXT: lbu a3, 5(a0) +; RV32I-NEXT: lbu a4, 4(a0) +; RV32I-NEXT: lbu a5, 6(a0) +; RV32I-NEXT: lbu a6, 7(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a4, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a3, 1(a1) +; RV32I-NEXT: lbu a6, 0(a1) +; RV32I-NEXT: lbu a7, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a6 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: addi a3, a1, -32 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: bltz a3, .LBB3_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srl a0, a4, a3 +; RV32I-NEXT: j .LBB3_3 +; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: lbu a5, 1(a0) +; RV32I-NEXT: lbu a6, 0(a0) +; RV32I-NEXT: lbu a7, 2(a0) +; RV32I-NEXT: lbu a0, 3(a0) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a7 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: srl a0, a0, a1 +; RV32I-NEXT: xori a5, a1, 31 +; RV32I-NEXT: slli a6, a4, 1 +; RV32I-NEXT: sll a5, a6, a5 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: srl a1, a4, a1 +; RV32I-NEXT: slti a3, a3, 0 +; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: and a1, a3, a1 +; RV32I-NEXT: sb a1, 4(a2) +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: sb a3, 6(a2) +; RV32I-NEXT: srli a3, a1, 24 +; RV32I-NEXT: sb a3, 7(a2) +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 5(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: sb a1, 3(a2) +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: ret + %src = load i64, ptr %src.ptr, align 1 + %bitOff = load i64, ptr %bitOff.ptr, align 1 + %res = lshr i64 %src, %bitOff + store i64 %res, ptr %dst, align 1 + ret void +} +define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; RV64I-LABEL: shl_8bytes: +; RV64I: # %bb.0: +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 2(a0) +; RV64I-NEXT: lbu a6, 3(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 5(a0) +; RV64I-NEXT: lbu a5, 4(a0) +; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lbu a3, 1(a1) +; RV64I-NEXT: lbu a4, 0(a1) +; RV64I-NEXT: lbu a5, 2(a1) +; RV64I-NEXT: lbu a6, 3(a1) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 5(a1) +; RV64I-NEXT: lbu a5, 4(a1) +; RV64I-NEXT: lbu a6, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a1, a1, 24 +; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: sll a0, a0, a1 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: sb a1, 6(a2) +; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: sb a1, 7(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: sb a1, 4(a2) +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: sb a1, 5(a2) +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: sb a1, 2(a2) +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: sb a1, 3(a2) +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: ret +; +; RV32I-LABEL: shl_8bytes: +; RV32I: # %bb.0: +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 0(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a6, 3(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a4, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a3, 1(a1) +; RV32I-NEXT: lbu a6, 0(a1) +; RV32I-NEXT: lbu a7, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a6 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: addi a3, a1, -32 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: bltz a3, .LBB4_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sll a0, a4, a3 +; RV32I-NEXT: j .LBB4_3 +; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: lbu a5, 5(a0) +; RV32I-NEXT: lbu a6, 4(a0) +; RV32I-NEXT: lbu a7, 6(a0) +; RV32I-NEXT: lbu a0, 7(a0) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a7 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: xori a5, a1, 31 +; RV32I-NEXT: srli a6, a4, 1 +; RV32I-NEXT: srl a5, a6, a5 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: .LBB4_3: +; RV32I-NEXT: sll a1, a4, a1 +; RV32I-NEXT: slti a3, a3, 0 +; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: and a1, a3, a1 +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: sb a3, 6(a2) +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: sb a3, 7(a2) +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: srli a0, a1, 16 +; RV32I-NEXT: sb a0, 2(a2) +; RV32I-NEXT: srli a0, a1, 24 +; RV32I-NEXT: sb a0, 3(a2) +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: ret + %src = load i64, ptr %src.ptr, align 1 + %bitOff = load i64, ptr %bitOff.ptr, align 1 + %res = shl i64 %src, %bitOff + store i64 %res, ptr %dst, align 1 + ret void +} +define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; RV64I-LABEL: ashr_8bytes: +; RV64I: # %bb.0: +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 2(a0) +; RV64I-NEXT: lbu a6, 3(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 5(a0) +; RV64I-NEXT: lbu a5, 4(a0) +; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lbu a3, 1(a1) +; RV64I-NEXT: lbu a4, 0(a1) +; RV64I-NEXT: lbu a5, 2(a1) +; RV64I-NEXT: lbu a6, 3(a1) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 5(a1) +; RV64I-NEXT: lbu a5, 4(a1) +; RV64I-NEXT: lbu a6, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a1, a1, 24 +; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: sra a0, a0, a1 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: sb a1, 6(a2) +; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: sb a1, 7(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: sb a1, 4(a2) +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: sb a1, 5(a2) +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: sb a1, 2(a2) +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: sb a1, 3(a2) +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: ret +; +; RV32I-LABEL: ashr_8bytes: +; RV32I: # %bb.0: +; RV32I-NEXT: lbu a3, 5(a0) +; RV32I-NEXT: lbu a4, 4(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: or a6, a3, a4 +; RV32I-NEXT: lbu a3, 6(a0) +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: lbu a7, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: or a5, a5, a4 +; RV32I-NEXT: lbu a4, 7(a0) +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: slli a3, a3, 16 +; RV32I-NEXT: slli a4, a4, 24 +; RV32I-NEXT: or a7, a4, a3 +; RV32I-NEXT: or a3, a1, a5 +; RV32I-NEXT: addi a5, a3, -32 +; RV32I-NEXT: or a1, a7, a6 +; RV32I-NEXT: bltz a5, .LBB5_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sra a0, a1, a5 +; RV32I-NEXT: srai a1, a4, 31 +; RV32I-NEXT: j .LBB5_3 +; RV32I-NEXT: .LBB5_2: +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu a6, 2(a0) +; RV32I-NEXT: lbu a0, 3(a0) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: srl a0, a0, a3 +; RV32I-NEXT: xori a4, a3, 31 +; RV32I-NEXT: slli a5, a1, 1 +; RV32I-NEXT: sll a4, a5, a4 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: sra a1, a1, a3 +; RV32I-NEXT: .LBB5_3: +; RV32I-NEXT: sb a1, 4(a2) +; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: sb a3, 6(a2) +; RV32I-NEXT: srli a3, a1, 24 +; RV32I-NEXT: sb a3, 7(a2) +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 5(a2) +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: sb a1, 3(a2) +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: ret + %src = load i64, ptr %src.ptr, align 1 + %bitOff = load i64, ptr %bitOff.ptr, align 1 + %res = ashr i64 %src, %bitOff + store i64 %res, ptr %dst, align 1 + ret void +} + +define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; RV64I-LABEL: lshr_16bytes: +; RV64I: # %bb.0: +; RV64I-NEXT: lbu a3, 9(a0) +; RV64I-NEXT: lbu a4, 8(a0) +; RV64I-NEXT: lbu a5, 10(a0) +; RV64I-NEXT: lbu a6, 11(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a4, a4, a3 +; RV64I-NEXT: lbu a3, 13(a0) +; RV64I-NEXT: lbu a5, 12(a0) +; RV64I-NEXT: lbu a6, 14(a0) +; RV64I-NEXT: lbu a7, 15(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: lbu a6, 0(a1) +; RV64I-NEXT: lbu a7, 2(a1) +; RV64I-NEXT: lbu t0, 3(a1) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 5(a1) +; RV64I-NEXT: lbu a7, 4(a1) +; RV64I-NEXT: lbu t0, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli a1, a1, 24 +; RV64I-NEXT: or a1, a1, t0 +; RV64I-NEXT: slli a7, a3, 32 +; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: addi a3, a1, -64 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: bltz a3, .LBB6_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: srl a0, a4, a3 +; RV64I-NEXT: j .LBB6_3 +; RV64I-NEXT: .LBB6_2: +; RV64I-NEXT: lbu a5, 1(a0) +; RV64I-NEXT: lbu a6, 0(a0) +; RV64I-NEXT: lbu a7, 2(a0) +; RV64I-NEXT: lbu t0, 3(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: lbu a7, 4(a0) +; RV64I-NEXT: lbu t0, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: srl a0, a0, a1 +; RV64I-NEXT: xori a5, a1, 63 +; RV64I-NEXT: slli a6, a4, 1 +; RV64I-NEXT: sll a5, a6, a5 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: .LBB6_3: +; RV64I-NEXT: srl a1, a4, a1 +; RV64I-NEXT: slti a3, a3, 0 +; RV64I-NEXT: neg a3, a3 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: sb a1, 8(a2) +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: sb a3, 15(a2) +; RV64I-NEXT: srli a3, a1, 48 +; RV64I-NEXT: sb a3, 14(a2) +; RV64I-NEXT: srli a3, a1, 40 +; RV64I-NEXT: sb a3, 13(a2) +; RV64I-NEXT: srli a3, a1, 32 +; RV64I-NEXT: sb a3, 12(a2) +; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: sb a3, 11(a2) +; RV64I-NEXT: srli a3, a1, 16 +; RV64I-NEXT: sb a3, 10(a2) +; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: sb a1, 9(a2) +; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: sb a1, 7(a2) +; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: sb a1, 6(a2) +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: sb a1, 5(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: sb a1, 4(a2) +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: sb a1, 3(a2) +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: sb a1, 2(a2) +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: ret +; +; RV32I-LABEL: lshr_16bytes: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a3, 5(a0) +; RV32I-NEXT: lbu a4, 4(a0) +; RV32I-NEXT: lbu a5, 6(a0) +; RV32I-NEXT: lbu a6, 7(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a7, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or t0, a6, a5 +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 0(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a6, 3(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or t3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or t4, a6, a5 +; RV32I-NEXT: lbu a3, 13(a0) +; RV32I-NEXT: lbu a4, 12(a0) +; RV32I-NEXT: lbu a5, 14(a0) +; RV32I-NEXT: lbu a6, 15(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a4, a6, a5 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 9(a0) +; RV32I-NEXT: lbu a5, 8(a0) +; RV32I-NEXT: lbu a6, 10(a0) +; RV32I-NEXT: lbu a0, 11(a0) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a5, a0, a6 +; RV32I-NEXT: or a5, a5, a4 +; RV32I-NEXT: lbu a0, 1(a1) +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a4, a1, a6 +; RV32I-NEXT: or a4, a4, a0 +; RV32I-NEXT: addi t1, a4, -64 +; RV32I-NEXT: addi t2, a4, -96 +; RV32I-NEXT: slli a6, a3, 1 +; RV32I-NEXT: bltz t2, .LBB6_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srl t5, a3, t2 +; RV32I-NEXT: j .LBB6_3 +; RV32I-NEXT: .LBB6_2: +; RV32I-NEXT: srl a0, a5, t1 +; RV32I-NEXT: xori a1, t1, 31 +; RV32I-NEXT: sll a1, a6, a1 +; RV32I-NEXT: or t5, a0, a1 +; RV32I-NEXT: .LBB6_3: +; RV32I-NEXT: or a0, t0, a7 +; RV32I-NEXT: or a1, t4, t3 +; RV32I-NEXT: addi t0, a4, -32 +; RV32I-NEXT: xori a7, a4, 31 +; RV32I-NEXT: bltz t0, .LBB6_5 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: srl s1, a0, t0 +; RV32I-NEXT: j .LBB6_6 +; RV32I-NEXT: .LBB6_5: +; RV32I-NEXT: srl t3, a1, a4 +; RV32I-NEXT: slli t4, a0, 1 +; RV32I-NEXT: sll t4, t4, a7 +; RV32I-NEXT: or s1, t3, t4 +; RV32I-NEXT: .LBB6_6: +; RV32I-NEXT: neg t3, a4 +; RV32I-NEXT: sll t4, a5, t3 +; RV32I-NEXT: li s0, 32 +; RV32I-NEXT: li t6, 64 +; RV32I-NEXT: sub s0, s0, a4 +; RV32I-NEXT: bltu a4, t6, .LBB6_12 +; RV32I-NEXT: # %bb.7: +; RV32I-NEXT: bnez a4, .LBB6_13 +; RV32I-NEXT: .LBB6_8: +; RV32I-NEXT: bgez s0, .LBB6_10 +; RV32I-NEXT: .LBB6_9: +; RV32I-NEXT: sll t3, a3, t3 +; RV32I-NEXT: srli t4, a5, 1 +; RV32I-NEXT: sub t5, t6, a4 +; RV32I-NEXT: xori t5, t5, 31 +; RV32I-NEXT: srl t4, t4, t5 +; RV32I-NEXT: or t4, t3, t4 +; RV32I-NEXT: .LBB6_10: +; RV32I-NEXT: slti t3, t0, 0 +; RV32I-NEXT: neg t3, t3 +; RV32I-NEXT: bltu a4, t6, .LBB6_14 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: srl t1, a3, t1 +; RV32I-NEXT: slti t2, t2, 0 +; RV32I-NEXT: neg t2, t2 +; RV32I-NEXT: and t1, t2, t1 +; RV32I-NEXT: bnez a4, .LBB6_15 +; RV32I-NEXT: j .LBB6_16 +; RV32I-NEXT: .LBB6_12: +; RV32I-NEXT: slti t5, s0, 0 +; RV32I-NEXT: neg t5, t5 +; RV32I-NEXT: and t5, t5, t4 +; RV32I-NEXT: or t5, s1, t5 +; RV32I-NEXT: beqz a4, .LBB6_8 +; RV32I-NEXT: .LBB6_13: +; RV32I-NEXT: mv a1, t5 +; RV32I-NEXT: bltz s0, .LBB6_9 +; RV32I-NEXT: j .LBB6_10 +; RV32I-NEXT: .LBB6_14: +; RV32I-NEXT: srl t1, a0, a4 +; RV32I-NEXT: and t1, t3, t1 +; RV32I-NEXT: or t1, t1, t4 +; RV32I-NEXT: beqz a4, .LBB6_16 +; RV32I-NEXT: .LBB6_15: +; RV32I-NEXT: mv a0, t1 +; RV32I-NEXT: .LBB6_16: +; RV32I-NEXT: bltz t0, .LBB6_18 +; RV32I-NEXT: # %bb.17: +; RV32I-NEXT: srl a5, a3, t0 +; RV32I-NEXT: j .LBB6_19 +; RV32I-NEXT: .LBB6_18: +; RV32I-NEXT: srl a5, a5, a4 +; RV32I-NEXT: sll a6, a6, a7 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: .LBB6_19: +; RV32I-NEXT: sltiu a6, a4, 64 +; RV32I-NEXT: neg a6, a6 +; RV32I-NEXT: and a5, a6, a5 +; RV32I-NEXT: srl a3, a3, a4 +; RV32I-NEXT: and a3, t3, a3 +; RV32I-NEXT: and a3, a6, a3 +; RV32I-NEXT: sb a5, 8(a2) +; RV32I-NEXT: sb a3, 12(a2) +; RV32I-NEXT: srli a4, a5, 16 +; RV32I-NEXT: sb a4, 10(a2) +; RV32I-NEXT: srli a4, a5, 24 +; RV32I-NEXT: sb a4, 11(a2) +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: srli a4, a3, 16 +; RV32I-NEXT: sb a4, 14(a2) +; RV32I-NEXT: srli a4, a3, 24 +; RV32I-NEXT: sb a4, 15(a2) +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a3, 13(a2) +; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: sb a3, 2(a2) +; RV32I-NEXT: srli a3, a1, 24 +; RV32I-NEXT: sb a3, 3(a2) +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: sb a1, 6(a2) +; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: sb a1, 7(a2) +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret + %src = load i128, ptr %src.ptr, align 1 + %bitOff = load i128, ptr %bitOff.ptr, align 1 + %res = lshr i128 %src, %bitOff + store i128 %res, ptr %dst, align 1 + ret void +} +define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; RV64I-LABEL: shl_16bytes: +; RV64I: # %bb.0: +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 2(a0) +; RV64I-NEXT: lbu a6, 3(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a4, a4, a3 +; RV64I-NEXT: lbu a3, 5(a0) +; RV64I-NEXT: lbu a5, 4(a0) +; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a7, 7(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: lbu a6, 0(a1) +; RV64I-NEXT: lbu a7, 2(a1) +; RV64I-NEXT: lbu t0, 3(a1) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 5(a1) +; RV64I-NEXT: lbu a7, 4(a1) +; RV64I-NEXT: lbu t0, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli a1, a1, 24 +; RV64I-NEXT: or a1, a1, t0 +; RV64I-NEXT: slli a7, a3, 32 +; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: addi a3, a1, -64 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: bltz a3, .LBB7_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: sll a0, a4, a3 +; RV64I-NEXT: j .LBB7_3 +; RV64I-NEXT: .LBB7_2: +; RV64I-NEXT: lbu a5, 9(a0) +; RV64I-NEXT: lbu a6, 8(a0) +; RV64I-NEXT: lbu a7, 10(a0) +; RV64I-NEXT: lbu t0, 11(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 13(a0) +; RV64I-NEXT: lbu a7, 12(a0) +; RV64I-NEXT: lbu t0, 14(a0) +; RV64I-NEXT: lbu a0, 15(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: sll a0, a0, a1 +; RV64I-NEXT: xori a5, a1, 63 +; RV64I-NEXT: srli a6, a4, 1 +; RV64I-NEXT: srl a5, a6, a5 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: .LBB7_3: +; RV64I-NEXT: sll a1, a4, a1 +; RV64I-NEXT: slti a3, a3, 0 +; RV64I-NEXT: neg a3, a3 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: sb a1, 0(a2) +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: sb a3, 15(a2) +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: sb a3, 14(a2) +; RV64I-NEXT: srli a3, a0, 40 +; RV64I-NEXT: sb a3, 13(a2) +; RV64I-NEXT: srli a3, a0, 32 +; RV64I-NEXT: sb a3, 12(a2) +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: sb a3, 11(a2) +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: sb a3, 10(a2) +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a0, 9(a2) +; RV64I-NEXT: srli a0, a1, 56 +; RV64I-NEXT: sb a0, 7(a2) +; RV64I-NEXT: srli a0, a1, 48 +; RV64I-NEXT: sb a0, 6(a2) +; RV64I-NEXT: srli a0, a1, 40 +; RV64I-NEXT: sb a0, 5(a2) +; RV64I-NEXT: srli a0, a1, 32 +; RV64I-NEXT: sb a0, 4(a2) +; RV64I-NEXT: srli a0, a1, 24 +; RV64I-NEXT: sb a0, 3(a2) +; RV64I-NEXT: srli a0, a1, 16 +; RV64I-NEXT: sb a0, 2(a2) +; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: sb a1, 1(a2) +; RV64I-NEXT: ret +; +; RV32I-LABEL: shl_16bytes: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a3, 9(a0) +; RV32I-NEXT: lbu a4, 8(a0) +; RV32I-NEXT: lbu a5, 10(a0) +; RV32I-NEXT: lbu a6, 11(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a7, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or t0, a6, a5 +; RV32I-NEXT: lbu a3, 13(a0) +; RV32I-NEXT: lbu a4, 12(a0) +; RV32I-NEXT: lbu a5, 14(a0) +; RV32I-NEXT: lbu a6, 15(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or t3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or t4, a6, a5 +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 0(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a6, 3(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a4, a6, a5 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 5(a0) +; RV32I-NEXT: lbu a5, 4(a0) +; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a0, 7(a0) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a5, a0, a6 +; RV32I-NEXT: or a5, a5, a4 +; RV32I-NEXT: lbu a0, 1(a1) +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a4, a1, a6 +; RV32I-NEXT: or a4, a4, a0 +; RV32I-NEXT: addi t1, a4, -64 +; RV32I-NEXT: addi t2, a4, -96 +; RV32I-NEXT: srli a6, a3, 1 +; RV32I-NEXT: bltz t2, .LBB7_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sll t5, a3, t2 +; RV32I-NEXT: j .LBB7_3 +; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: sll a0, a5, t1 +; RV32I-NEXT: xori a1, t1, 31 +; RV32I-NEXT: srl a1, a6, a1 +; RV32I-NEXT: or t5, a0, a1 +; RV32I-NEXT: .LBB7_3: +; RV32I-NEXT: or a0, t0, a7 +; RV32I-NEXT: or a1, t4, t3 +; RV32I-NEXT: addi t0, a4, -32 +; RV32I-NEXT: xori a7, a4, 31 +; RV32I-NEXT: bltz t0, .LBB7_5 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: sll s1, a0, t0 +; RV32I-NEXT: j .LBB7_6 +; RV32I-NEXT: .LBB7_5: +; RV32I-NEXT: sll t3, a1, a4 +; RV32I-NEXT: srli t4, a0, 1 +; RV32I-NEXT: srl t4, t4, a7 +; RV32I-NEXT: or s1, t3, t4 +; RV32I-NEXT: .LBB7_6: +; RV32I-NEXT: neg t3, a4 +; RV32I-NEXT: srl t4, a5, t3 +; RV32I-NEXT: li s0, 32 +; RV32I-NEXT: li t6, 64 +; RV32I-NEXT: sub s0, s0, a4 +; RV32I-NEXT: bltu a4, t6, .LBB7_12 +; RV32I-NEXT: # %bb.7: +; RV32I-NEXT: bnez a4, .LBB7_13 +; RV32I-NEXT: .LBB7_8: +; RV32I-NEXT: bgez s0, .LBB7_10 +; RV32I-NEXT: .LBB7_9: +; RV32I-NEXT: srl t3, a3, t3 +; RV32I-NEXT: slli t4, a5, 1 +; RV32I-NEXT: sub t5, t6, a4 +; RV32I-NEXT: xori t5, t5, 31 +; RV32I-NEXT: sll t4, t4, t5 +; RV32I-NEXT: or t4, t3, t4 +; RV32I-NEXT: .LBB7_10: +; RV32I-NEXT: slti t3, t0, 0 +; RV32I-NEXT: neg t3, t3 +; RV32I-NEXT: bltu a4, t6, .LBB7_14 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: sll t1, a3, t1 +; RV32I-NEXT: slti t2, t2, 0 +; RV32I-NEXT: neg t2, t2 +; RV32I-NEXT: and t1, t2, t1 +; RV32I-NEXT: bnez a4, .LBB7_15 +; RV32I-NEXT: j .LBB7_16 +; RV32I-NEXT: .LBB7_12: +; RV32I-NEXT: slti t5, s0, 0 +; RV32I-NEXT: neg t5, t5 +; RV32I-NEXT: and t5, t5, t4 +; RV32I-NEXT: or t5, s1, t5 +; RV32I-NEXT: beqz a4, .LBB7_8 +; RV32I-NEXT: .LBB7_13: +; RV32I-NEXT: mv a1, t5 +; RV32I-NEXT: bltz s0, .LBB7_9 +; RV32I-NEXT: j .LBB7_10 +; RV32I-NEXT: .LBB7_14: +; RV32I-NEXT: sll t1, a0, a4 +; RV32I-NEXT: and t1, t3, t1 +; RV32I-NEXT: or t1, t1, t4 +; RV32I-NEXT: beqz a4, .LBB7_16 +; RV32I-NEXT: .LBB7_15: +; RV32I-NEXT: mv a0, t1 +; RV32I-NEXT: .LBB7_16: +; RV32I-NEXT: bltz t0, .LBB7_18 +; RV32I-NEXT: # %bb.17: +; RV32I-NEXT: sll a5, a3, t0 +; RV32I-NEXT: j .LBB7_19 +; RV32I-NEXT: .LBB7_18: +; RV32I-NEXT: sll a5, a5, a4 +; RV32I-NEXT: srl a6, a6, a7 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: .LBB7_19: +; RV32I-NEXT: sltiu a6, a4, 64 +; RV32I-NEXT: neg a6, a6 +; RV32I-NEXT: and a5, a6, a5 +; RV32I-NEXT: sll a3, a3, a4 +; RV32I-NEXT: and a3, t3, a3 +; RV32I-NEXT: and a3, a6, a3 +; RV32I-NEXT: sb a3, 0(a2) +; RV32I-NEXT: sb a5, 4(a2) +; RV32I-NEXT: srli a4, a3, 16 +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: srli a4, a3, 24 +; RV32I-NEXT: sb a4, 3(a2) +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a3, 1(a2) +; RV32I-NEXT: srli a3, a5, 16 +; RV32I-NEXT: sb a3, 6(a2) +; RV32I-NEXT: srli a3, a5, 24 +; RV32I-NEXT: sb a3, 7(a2) +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: sb a5, 5(a2) +; RV32I-NEXT: sb a1, 12(a2) +; RV32I-NEXT: sb a0, 8(a2) +; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: sb a3, 14(a2) +; RV32I-NEXT: srli a3, a1, 24 +; RV32I-NEXT: sb a3, 15(a2) +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 13(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: sb a1, 10(a2) +; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: sb a1, 11(a2) +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb a0, 9(a2) +; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret + %src = load i128, ptr %src.ptr, align 1 + %bitOff = load i128, ptr %bitOff.ptr, align 1 + %res = shl i128 %src, %bitOff + store i128 %res, ptr %dst, align 1 + ret void +} +define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; RV64I-LABEL: ashr_16bytes: +; RV64I: # %bb.0: +; RV64I-NEXT: lbu a3, 9(a0) +; RV64I-NEXT: lbu a4, 8(a0) +; RV64I-NEXT: lbu a5, 10(a0) +; RV64I-NEXT: lbu a6, 11(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 13(a0) +; RV64I-NEXT: lbu a5, 12(a0) +; RV64I-NEXT: lbu a6, 14(a0) +; RV64I-NEXT: lbu a7, 15(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: lbu a6, 0(a1) +; RV64I-NEXT: lbu a7, 2(a1) +; RV64I-NEXT: lbu t0, 3(a1) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 5(a1) +; RV64I-NEXT: lbu a7, 4(a1) +; RV64I-NEXT: lbu t0, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli a1, a1, 24 +; RV64I-NEXT: or a1, a1, t0 +; RV64I-NEXT: slli a7, a4, 32 +; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: addi a5, a1, -64 +; RV64I-NEXT: or a3, a7, a3 +; RV64I-NEXT: bltz a5, .LBB8_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: sra a0, a3, a5 +; RV64I-NEXT: sraiw a1, a4, 31 +; RV64I-NEXT: j .LBB8_3 +; RV64I-NEXT: .LBB8_2: +; RV64I-NEXT: lbu a4, 1(a0) +; RV64I-NEXT: lbu a5, 0(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 3(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 5(a0) +; RV64I-NEXT: lbu a6, 4(a0) +; RV64I-NEXT: lbu a7, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: srl a0, a0, a1 +; RV64I-NEXT: xori a4, a1, 63 +; RV64I-NEXT: slli a5, a3, 1 +; RV64I-NEXT: sll a4, a5, a4 +; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: sra a1, a3, a1 +; RV64I-NEXT: .LBB8_3: +; RV64I-NEXT: sb a1, 8(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: sb a3, 15(a2) +; RV64I-NEXT: srli a3, a1, 48 +; RV64I-NEXT: sb a3, 14(a2) +; RV64I-NEXT: srli a3, a1, 40 +; RV64I-NEXT: sb a3, 13(a2) +; RV64I-NEXT: srli a3, a1, 32 +; RV64I-NEXT: sb a3, 12(a2) +; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: sb a3, 11(a2) +; RV64I-NEXT: srli a3, a1, 16 +; RV64I-NEXT: sb a3, 10(a2) +; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: sb a1, 9(a2) +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: sb a1, 7(a2) +; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: sb a1, 6(a2) +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: sb a1, 5(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: sb a1, 4(a2) +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: sb a1, 3(a2) +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: sb a1, 2(a2) +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: ret +; +; RV32I-LABEL: ashr_16bytes: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a3, 5(a0) +; RV32I-NEXT: lbu a4, 4(a0) +; RV32I-NEXT: lbu a5, 6(a0) +; RV32I-NEXT: lbu a6, 7(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a7, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or t1, a6, a5 +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 0(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a6, 3(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or t2, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or t5, a6, a5 +; RV32I-NEXT: lbu a3, 13(a0) +; RV32I-NEXT: lbu a4, 12(a0) +; RV32I-NEXT: lbu a5, 14(a0) +; RV32I-NEXT: lbu t0, 15(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a4, t0, a5 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 9(a0) +; RV32I-NEXT: lbu a5, 8(a0) +; RV32I-NEXT: lbu a6, 10(a0) +; RV32I-NEXT: lbu a0, 11(a0) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a5, a0, a6 +; RV32I-NEXT: or a5, a5, a4 +; RV32I-NEXT: lbu a0, 1(a1) +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a4, a1, a6 +; RV32I-NEXT: or a4, a4, a0 +; RV32I-NEXT: addi t3, a4, -64 +; RV32I-NEXT: addi t4, a4, -96 +; RV32I-NEXT: slli a6, a3, 1 +; RV32I-NEXT: bltz t4, .LBB8_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sra t6, a3, t4 +; RV32I-NEXT: j .LBB8_3 +; RV32I-NEXT: .LBB8_2: +; RV32I-NEXT: srl a0, a5, t3 +; RV32I-NEXT: xori a1, t3, 31 +; RV32I-NEXT: sll a1, a6, a1 +; RV32I-NEXT: or t6, a0, a1 +; RV32I-NEXT: .LBB8_3: +; RV32I-NEXT: or a0, t1, a7 +; RV32I-NEXT: or a1, t5, t2 +; RV32I-NEXT: addi a7, a4, -32 +; RV32I-NEXT: xori t2, a4, 31 +; RV32I-NEXT: bltz a7, .LBB8_5 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: srl s2, a0, a7 +; RV32I-NEXT: j .LBB8_6 +; RV32I-NEXT: .LBB8_5: +; RV32I-NEXT: srl t1, a1, a4 +; RV32I-NEXT: slli t5, a0, 1 +; RV32I-NEXT: sll t5, t5, t2 +; RV32I-NEXT: or s2, t1, t5 +; RV32I-NEXT: .LBB8_6: +; RV32I-NEXT: neg s0, a4 +; RV32I-NEXT: sll t5, a5, s0 +; RV32I-NEXT: li s1, 32 +; RV32I-NEXT: li t1, 64 +; RV32I-NEXT: sub s1, s1, a4 +; RV32I-NEXT: bltu a4, t1, .LBB8_11 +; RV32I-NEXT: # %bb.7: +; RV32I-NEXT: bnez a4, .LBB8_12 +; RV32I-NEXT: .LBB8_8: +; RV32I-NEXT: bltz s1, .LBB8_13 +; RV32I-NEXT: .LBB8_9: +; RV32I-NEXT: srai t0, t0, 31 +; RV32I-NEXT: bltz t4, .LBB8_14 +; RV32I-NEXT: .LBB8_10: +; RV32I-NEXT: mv t3, t0 +; RV32I-NEXT: bltu a4, t1, .LBB8_15 +; RV32I-NEXT: j .LBB8_16 +; RV32I-NEXT: .LBB8_11: +; RV32I-NEXT: slti t6, s1, 0 +; RV32I-NEXT: neg t6, t6 +; RV32I-NEXT: and t6, t6, t5 +; RV32I-NEXT: or t6, s2, t6 +; RV32I-NEXT: beqz a4, .LBB8_8 +; RV32I-NEXT: .LBB8_12: +; RV32I-NEXT: mv a1, t6 +; RV32I-NEXT: bgez s1, .LBB8_9 +; RV32I-NEXT: .LBB8_13: +; RV32I-NEXT: sll t5, a3, s0 +; RV32I-NEXT: srli t6, a5, 1 +; RV32I-NEXT: sub s0, t1, a4 +; RV32I-NEXT: xori s0, s0, 31 +; RV32I-NEXT: srl t6, t6, s0 +; RV32I-NEXT: or t5, t5, t6 +; RV32I-NEXT: srai t0, t0, 31 +; RV32I-NEXT: bgez t4, .LBB8_10 +; RV32I-NEXT: .LBB8_14: +; RV32I-NEXT: sra t3, a3, t3 +; RV32I-NEXT: bgeu a4, t1, .LBB8_16 +; RV32I-NEXT: .LBB8_15: +; RV32I-NEXT: slti t3, a7, 0 +; RV32I-NEXT: srl t4, a0, a4 +; RV32I-NEXT: neg t3, t3 +; RV32I-NEXT: and t3, t3, t4 +; RV32I-NEXT: or t3, t3, t5 +; RV32I-NEXT: .LBB8_16: +; RV32I-NEXT: bnez a4, .LBB8_19 +; RV32I-NEXT: # %bb.17: +; RV32I-NEXT: bltz a7, .LBB8_20 +; RV32I-NEXT: .LBB8_18: +; RV32I-NEXT: sra a5, a3, a7 +; RV32I-NEXT: bgeu a4, t1, .LBB8_21 +; RV32I-NEXT: j .LBB8_22 +; RV32I-NEXT: .LBB8_19: +; RV32I-NEXT: mv a0, t3 +; RV32I-NEXT: bgez a7, .LBB8_18 +; RV32I-NEXT: .LBB8_20: +; RV32I-NEXT: srl a5, a5, a4 +; RV32I-NEXT: sll a6, a6, t2 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: bltu a4, t1, .LBB8_22 +; RV32I-NEXT: .LBB8_21: +; RV32I-NEXT: mv a5, t0 +; RV32I-NEXT: .LBB8_22: +; RV32I-NEXT: bltz a7, .LBB8_24 +; RV32I-NEXT: # %bb.23: +; RV32I-NEXT: mv a3, t0 +; RV32I-NEXT: bgeu a4, t1, .LBB8_25 +; RV32I-NEXT: j .LBB8_26 +; RV32I-NEXT: .LBB8_24: +; RV32I-NEXT: sra a3, a3, a4 +; RV32I-NEXT: bltu a4, t1, .LBB8_26 +; RV32I-NEXT: .LBB8_25: +; RV32I-NEXT: mv a3, t0 +; RV32I-NEXT: .LBB8_26: +; RV32I-NEXT: sb a3, 12(a2) +; RV32I-NEXT: srli a4, a3, 16 +; RV32I-NEXT: sb a4, 14(a2) +; RV32I-NEXT: srli a4, a3, 24 +; RV32I-NEXT: sb a4, 15(a2) +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a3, 13(a2) +; RV32I-NEXT: sb a5, 8(a2) +; RV32I-NEXT: srli a3, a5, 16 +; RV32I-NEXT: sb a3, 10(a2) +; RV32I-NEXT: srli a3, a5, 24 +; RV32I-NEXT: sb a3, 11(a2) +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: sb a3, 2(a2) +; RV32I-NEXT: srli a3, a1, 24 +; RV32I-NEXT: sb a3, 3(a2) +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: sb a1, 6(a2) +; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: sb a1, 7(a2) +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret + %src = load i128, ptr %src.ptr, align 1 + %bitOff = load i128, ptr %bitOff.ptr, align 1 + %res = ashr i128 %src, %bitOff + store i128 %res, ptr %dst, align 1 + ret void +} + +define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; RV64I-LABEL: lshr_32bytes: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a3, 9(a0) +; RV64I-NEXT: lbu a4, 8(a0) +; RV64I-NEXT: lbu a5, 10(a0) +; RV64I-NEXT: lbu a6, 11(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a5, a5, a3 +; RV64I-NEXT: lbu a3, 13(a0) +; RV64I-NEXT: lbu a4, 12(a0) +; RV64I-NEXT: lbu a6, 14(a0) +; RV64I-NEXT: lbu a7, 15(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a4, a7, a6 +; RV64I-NEXT: or a6, a4, a3 +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a7, 2(a0) +; RV64I-NEXT: lbu t0, 3(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a4, t0, a7 +; RV64I-NEXT: or a7, a4, a3 +; RV64I-NEXT: lbu a3, 5(a0) +; RV64I-NEXT: lbu a4, 4(a0) +; RV64I-NEXT: lbu t0, 6(a0) +; RV64I-NEXT: lbu t1, 7(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a4, t1, t0 +; RV64I-NEXT: or t0, a4, a3 +; RV64I-NEXT: lbu a3, 25(a0) +; RV64I-NEXT: lbu a4, 24(a0) +; RV64I-NEXT: lbu t1, 26(a0) +; RV64I-NEXT: lbu t2, 27(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t2, t2, 24 +; RV64I-NEXT: or a4, t2, t1 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 29(a0) +; RV64I-NEXT: lbu t1, 28(a0) +; RV64I-NEXT: lbu t2, 30(a0) +; RV64I-NEXT: lbu t3, 31(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, t1 +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t3, t3, 24 +; RV64I-NEXT: or t1, t3, t2 +; RV64I-NEXT: or a4, t1, a4 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 17(a0) +; RV64I-NEXT: lbu t1, 16(a0) +; RV64I-NEXT: lbu t2, 18(a0) +; RV64I-NEXT: lbu t3, 19(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, t1 +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t3, t3, 24 +; RV64I-NEXT: or t1, t3, t2 +; RV64I-NEXT: or a4, t1, a4 +; RV64I-NEXT: lbu t1, 21(a0) +; RV64I-NEXT: lbu t2, 20(a0) +; RV64I-NEXT: lbu t3, 22(a0) +; RV64I-NEXT: lbu a0, 23(a0) +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t1, t1, t2 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, t3 +; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a4, a0, a4 +; RV64I-NEXT: lbu a0, 1(a1) +; RV64I-NEXT: lbu t1, 0(a1) +; RV64I-NEXT: lbu t2, 2(a1) +; RV64I-NEXT: lbu t3, 3(a1) +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t3, t3, 24 +; RV64I-NEXT: lbu t1, 5(a1) +; RV64I-NEXT: lbu t4, 4(a1) +; RV64I-NEXT: or t2, t3, t2 +; RV64I-NEXT: or t2, t2, a0 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t1, t1, t4 +; RV64I-NEXT: lbu t3, 6(a1) +; RV64I-NEXT: lbu t4, 7(a1) +; RV64I-NEXT: slli a0, a6, 32 +; RV64I-NEXT: slli a1, t0, 32 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli t4, t4, 24 +; RV64I-NEXT: or a6, t4, t3 +; RV64I-NEXT: or a6, a6, t1 +; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: or a6, a6, t2 +; RV64I-NEXT: addi t1, a6, -128 +; RV64I-NEXT: addi t2, a6, -192 +; RV64I-NEXT: slli t0, a3, 1 +; RV64I-NEXT: bltz t2, .LBB9_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: srl t3, a3, t2 +; RV64I-NEXT: j .LBB9_3 +; RV64I-NEXT: .LBB9_2: +; RV64I-NEXT: srl t3, a4, t1 +; RV64I-NEXT: xori t4, t1, 63 +; RV64I-NEXT: sll t4, t0, t4 +; RV64I-NEXT: or t3, t3, t4 +; RV64I-NEXT: .LBB9_3: +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: addi a7, a6, -64 +; RV64I-NEXT: xori a5, a6, 63 +; RV64I-NEXT: bltz a7, .LBB9_5 +; RV64I-NEXT: # %bb.4: +; RV64I-NEXT: srl s1, a0, a7 +; RV64I-NEXT: j .LBB9_6 +; RV64I-NEXT: .LBB9_5: +; RV64I-NEXT: srl t4, a1, a6 +; RV64I-NEXT: slli t5, a0, 1 +; RV64I-NEXT: sll t5, t5, a5 +; RV64I-NEXT: or s1, t4, t5 +; RV64I-NEXT: .LBB9_6: +; RV64I-NEXT: negw t6, a6 +; RV64I-NEXT: sll t4, a4, t6 +; RV64I-NEXT: li s0, 64 +; RV64I-NEXT: li t5, 128 +; RV64I-NEXT: sub s0, s0, a6 +; RV64I-NEXT: bltu a6, t5, .LBB9_12 +; RV64I-NEXT: # %bb.7: +; RV64I-NEXT: bnez a6, .LBB9_13 +; RV64I-NEXT: .LBB9_8: +; RV64I-NEXT: bgez s0, .LBB9_10 +; RV64I-NEXT: .LBB9_9: +; RV64I-NEXT: sll t3, a3, t6 +; RV64I-NEXT: srli t4, a4, 1 +; RV64I-NEXT: sub t6, t5, a6 +; RV64I-NEXT: xori t6, t6, 63 +; RV64I-NEXT: srl t4, t4, t6 +; RV64I-NEXT: or t4, t3, t4 +; RV64I-NEXT: .LBB9_10: +; RV64I-NEXT: slti t3, a7, 0 +; RV64I-NEXT: neg t3, t3 +; RV64I-NEXT: bltu a6, t5, .LBB9_14 +; RV64I-NEXT: # %bb.11: +; RV64I-NEXT: srl t1, a3, t1 +; RV64I-NEXT: slti t2, t2, 0 +; RV64I-NEXT: neg t2, t2 +; RV64I-NEXT: and t1, t2, t1 +; RV64I-NEXT: bnez a6, .LBB9_15 +; RV64I-NEXT: j .LBB9_16 +; RV64I-NEXT: .LBB9_12: +; RV64I-NEXT: slti t3, s0, 0 +; RV64I-NEXT: neg t3, t3 +; RV64I-NEXT: and t3, t3, t4 +; RV64I-NEXT: or t3, s1, t3 +; RV64I-NEXT: beqz a6, .LBB9_8 +; RV64I-NEXT: .LBB9_13: +; RV64I-NEXT: mv a1, t3 +; RV64I-NEXT: bltz s0, .LBB9_9 +; RV64I-NEXT: j .LBB9_10 +; RV64I-NEXT: .LBB9_14: +; RV64I-NEXT: srl t1, a0, a6 +; RV64I-NEXT: and t1, t3, t1 +; RV64I-NEXT: or t1, t1, t4 +; RV64I-NEXT: beqz a6, .LBB9_16 +; RV64I-NEXT: .LBB9_15: +; RV64I-NEXT: mv a0, t1 +; RV64I-NEXT: .LBB9_16: +; RV64I-NEXT: bltz a7, .LBB9_18 +; RV64I-NEXT: # %bb.17: +; RV64I-NEXT: srl a4, a3, a7 +; RV64I-NEXT: j .LBB9_19 +; RV64I-NEXT: .LBB9_18: +; RV64I-NEXT: srl a4, a4, a6 +; RV64I-NEXT: sll a5, t0, a5 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: .LBB9_19: +; RV64I-NEXT: sltiu a5, a6, 128 +; RV64I-NEXT: neg a5, a5 +; RV64I-NEXT: and a4, a5, a4 +; RV64I-NEXT: srl a3, a3, a6 +; RV64I-NEXT: and a3, t3, a3 +; RV64I-NEXT: and a3, a5, a3 +; RV64I-NEXT: sb a4, 16(a2) +; RV64I-NEXT: sb a3, 24(a2) +; RV64I-NEXT: srli a5, a4, 56 +; RV64I-NEXT: sb a5, 23(a2) +; RV64I-NEXT: srli a5, a4, 48 +; RV64I-NEXT: sb a5, 22(a2) +; RV64I-NEXT: srli a5, a4, 40 +; RV64I-NEXT: sb a5, 21(a2) +; RV64I-NEXT: srli a5, a4, 32 +; RV64I-NEXT: sb a5, 20(a2) +; RV64I-NEXT: srli a5, a4, 24 +; RV64I-NEXT: sb a5, 19(a2) +; RV64I-NEXT: srli a5, a4, 16 +; RV64I-NEXT: sb a5, 18(a2) +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: sb a4, 17(a2) +; RV64I-NEXT: srli a4, a3, 56 +; RV64I-NEXT: sb a4, 31(a2) +; RV64I-NEXT: srli a4, a3, 48 +; RV64I-NEXT: sb a4, 30(a2) +; RV64I-NEXT: srli a4, a3, 40 +; RV64I-NEXT: sb a4, 29(a2) +; RV64I-NEXT: srli a4, a3, 32 +; RV64I-NEXT: sb a4, 28(a2) +; RV64I-NEXT: srli a4, a3, 24 +; RV64I-NEXT: sb a4, 27(a2) +; RV64I-NEXT: srli a4, a3, 16 +; RV64I-NEXT: sb a4, 26(a2) +; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a3, 25(a2) +; RV64I-NEXT: sb a1, 0(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: sb a3, 7(a2) +; RV64I-NEXT: srli a3, a1, 48 +; RV64I-NEXT: sb a3, 6(a2) +; RV64I-NEXT: srli a3, a1, 40 +; RV64I-NEXT: sb a3, 5(a2) +; RV64I-NEXT: srli a3, a1, 32 +; RV64I-NEXT: sb a3, 4(a2) +; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: sb a3, 3(a2) +; RV64I-NEXT: srli a3, a1, 16 +; RV64I-NEXT: sb a3, 2(a2) +; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: sb a1, 1(a2) +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: sb a1, 15(a2) +; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: sb a1, 14(a2) +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: sb a1, 13(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: sb a1, 12(a2) +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: sb a1, 11(a2) +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: sb a1, 10(a2) +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a0, 9(a2) +; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV32I-LABEL: lshr_32bytes: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -128 +; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu t0, 4(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: lbu t2, 6(a0) +; RV32I-NEXT: lbu t4, 7(a0) +; RV32I-NEXT: lbu t1, 0(a0) +; RV32I-NEXT: lbu t5, 1(a0) +; RV32I-NEXT: lbu t6, 2(a0) +; RV32I-NEXT: lbu s0, 3(a0) +; RV32I-NEXT: lbu t3, 12(a0) +; RV32I-NEXT: lbu a7, 13(a0) +; RV32I-NEXT: lbu s1, 14(a0) +; RV32I-NEXT: lbu s6, 15(a0) +; RV32I-NEXT: lbu s2, 8(a0) +; RV32I-NEXT: lbu s3, 9(a0) +; RV32I-NEXT: lbu s4, 10(a0) +; RV32I-NEXT: lbu s5, 11(a0) +; RV32I-NEXT: lbu a3, 21(a0) +; RV32I-NEXT: lbu a4, 20(a0) +; RV32I-NEXT: lbu a5, 22(a0) +; RV32I-NEXT: lbu s7, 23(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli s7, s7, 24 +; RV32I-NEXT: or a4, s7, a5 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 17(a0) +; RV32I-NEXT: lbu a5, 16(a0) +; RV32I-NEXT: lbu s8, 18(a0) +; RV32I-NEXT: lbu s9, 19(a0) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or s7, a4, a5 +; RV32I-NEXT: slli s8, s8, 16 +; RV32I-NEXT: slli s9, s9, 24 +; RV32I-NEXT: or s9, s9, s8 +; RV32I-NEXT: lbu a4, 29(a0) +; RV32I-NEXT: lbu a5, 28(a0) +; RV32I-NEXT: lbu s8, 30(a0) +; RV32I-NEXT: lbu s10, 31(a0) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: slli s8, s8, 16 +; RV32I-NEXT: slli s10, s10, 24 +; RV32I-NEXT: or a5, s10, s8 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: lbu a5, 25(a0) +; RV32I-NEXT: lbu s8, 24(a0) +; RV32I-NEXT: lbu s10, 26(a0) +; RV32I-NEXT: lbu a0, 27(a0) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a5, a5, s8 +; RV32I-NEXT: slli s10, s10, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, s10 +; RV32I-NEXT: or ra, a0, a5 +; RV32I-NEXT: lbu a0, 1(a1) +; RV32I-NEXT: lbu a5, 0(a1) +; RV32I-NEXT: lbu s8, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: slli s8, s8, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, s8 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: addi a5, a0, -192 +; RV32I-NEXT: addi a1, a0, -224 +; RV32I-NEXT: slli s8, a4, 1 +; RV32I-NEXT: sw s8, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a5, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltz a1, .LBB9_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srl s8, a4, a1 +; RV32I-NEXT: j .LBB9_3 +; RV32I-NEXT: .LBB9_2: +; RV32I-NEXT: srl a1, ra, a5 +; RV32I-NEXT: xori a5, a5, 31 +; RV32I-NEXT: sll a5, s8, a5 +; RV32I-NEXT: or s8, a1, a5 +; RV32I-NEXT: .LBB9_3: +; RV32I-NEXT: slli a5, a7, 8 +; RV32I-NEXT: slli s10, s1, 16 +; RV32I-NEXT: slli s6, s6, 24 +; RV32I-NEXT: or a7, s9, s7 +; RV32I-NEXT: addi s1, a0, -128 +; RV32I-NEXT: slli a1, a3, 1 +; RV32I-NEXT: addi s9, a0, -160 +; RV32I-NEXT: xori s11, s1, 31 +; RV32I-NEXT: sw a1, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltz s9, .LBB9_5 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: srl s7, a3, s9 +; RV32I-NEXT: j .LBB9_6 +; RV32I-NEXT: .LBB9_5: +; RV32I-NEXT: srl s7, a7, s1 +; RV32I-NEXT: sll s11, a1, s11 +; RV32I-NEXT: or s7, s7, s11 +; RV32I-NEXT: .LBB9_6: +; RV32I-NEXT: slli s3, s3, 8 +; RV32I-NEXT: slli s4, s4, 16 +; RV32I-NEXT: slli s5, s5, 24 +; RV32I-NEXT: or a5, a5, t3 +; RV32I-NEXT: or s6, s6, s10 +; RV32I-NEXT: neg s11, a0 +; RV32I-NEXT: sll s10, ra, s11 +; RV32I-NEXT: li t3, 160 +; RV32I-NEXT: li a1, 64 +; RV32I-NEXT: sub t3, t3, a0 +; RV32I-NEXT: sw s10, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t3, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: bgeu s1, a1, .LBB9_8 +; RV32I-NEXT: # %bb.7: +; RV32I-NEXT: slti t3, t3, 0 +; RV32I-NEXT: neg t3, t3 +; RV32I-NEXT: and t3, t3, s10 +; RV32I-NEXT: or s8, s7, t3 +; RV32I-NEXT: .LBB9_8: +; RV32I-NEXT: slli s10, a6, 8 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t4, t4, 24 +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: slli t6, t6, 16 +; RV32I-NEXT: slli s0, s0, 24 +; RV32I-NEXT: or s2, s3, s2 +; RV32I-NEXT: or s3, s5, s4 +; RV32I-NEXT: or a6, s6, a5 +; RV32I-NEXT: mv s7, a7 +; RV32I-NEXT: beqz s1, .LBB9_10 +; RV32I-NEXT: # %bb.9: +; RV32I-NEXT: mv s7, s8 +; RV32I-NEXT: .LBB9_10: +; RV32I-NEXT: or t0, s10, t0 +; RV32I-NEXT: or t2, t4, t2 +; RV32I-NEXT: or t1, t5, t1 +; RV32I-NEXT: or t4, s0, t6 +; RV32I-NEXT: or s5, s3, s2 +; RV32I-NEXT: addi a1, a0, -64 +; RV32I-NEXT: slli t5, a6, 1 +; RV32I-NEXT: addi s4, a0, -96 +; RV32I-NEXT: xori t3, a1, 31 +; RV32I-NEXT: sw a1, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t3, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t5, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltz s4, .LBB9_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: srl a5, a6, s4 +; RV32I-NEXT: j .LBB9_13 +; RV32I-NEXT: .LBB9_12: +; RV32I-NEXT: srl a5, s5, a1 +; RV32I-NEXT: sll t3, t5, t3 +; RV32I-NEXT: or a5, a5, t3 +; RV32I-NEXT: .LBB9_13: +; RV32I-NEXT: li t5, 64 +; RV32I-NEXT: or s3, t2, t0 +; RV32I-NEXT: or t1, t4, t1 +; RV32I-NEXT: addi t6, a0, -32 +; RV32I-NEXT: xori s10, a0, 31 +; RV32I-NEXT: bltz t6, .LBB9_15 +; RV32I-NEXT: # %bb.14: +; RV32I-NEXT: srl t4, s3, t6 +; RV32I-NEXT: j .LBB9_16 +; RV32I-NEXT: .LBB9_15: +; RV32I-NEXT: srl t0, t1, a0 +; RV32I-NEXT: slli t2, s3, 1 +; RV32I-NEXT: sll t2, t2, s10 +; RV32I-NEXT: or t4, t0, t2 +; RV32I-NEXT: .LBB9_16: +; RV32I-NEXT: sll t2, s5, s11 +; RV32I-NEXT: li t0, 32 +; RV32I-NEXT: sub s0, t0, a0 +; RV32I-NEXT: slti t3, s0, 0 +; RV32I-NEXT: neg a1, t3 +; RV32I-NEXT: bgeu a0, t5, .LBB9_18 +; RV32I-NEXT: # %bb.17: +; RV32I-NEXT: and a5, a1, t2 +; RV32I-NEXT: or a5, t4, a5 +; RV32I-NEXT: .LBB9_18: +; RV32I-NEXT: mv s8, t1 +; RV32I-NEXT: beqz a0, .LBB9_20 +; RV32I-NEXT: # %bb.19: +; RV32I-NEXT: mv s8, a5 +; RV32I-NEXT: .LBB9_20: +; RV32I-NEXT: sll a5, a7, s11 +; RV32I-NEXT: li t3, 96 +; RV32I-NEXT: sub s6, t3, a0 +; RV32I-NEXT: slti t3, s6, 0 +; RV32I-NEXT: neg t4, t3 +; RV32I-NEXT: li s2, 128 +; RV32I-NEXT: sub t5, s2, a0 +; RV32I-NEXT: sltiu t3, t5, 64 +; RV32I-NEXT: neg t3, t3 +; RV32I-NEXT: sw t3, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: bgeu a0, s2, .LBB9_22 +; RV32I-NEXT: # %bb.21: +; RV32I-NEXT: mv s2, t3 +; RV32I-NEXT: and t3, t4, a5 +; RV32I-NEXT: and t3, s2, t3 +; RV32I-NEXT: or s7, s8, t3 +; RV32I-NEXT: .LBB9_22: +; RV32I-NEXT: li s8, 64 +; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz a0, .LBB9_24 +; RV32I-NEXT: # %bb.23: +; RV32I-NEXT: mv t1, s7 +; RV32I-NEXT: .LBB9_24: +; RV32I-NEXT: neg t3, t5 +; RV32I-NEXT: sub s0, t0, t5 +; RV32I-NEXT: srl t0, a3, t3 +; RV32I-NEXT: sw a1, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t0, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: bgez s0, .LBB9_26 +; RV32I-NEXT: # %bb.25: +; RV32I-NEXT: srl t0, a7, t3 +; RV32I-NEXT: sub t3, s8, t5 +; RV32I-NEXT: xori t3, t3, 31 +; RV32I-NEXT: lw a1, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll t3, a1, t3 +; RV32I-NEXT: lw a1, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t0, t0, t3 +; RV32I-NEXT: .LBB9_26: +; RV32I-NEXT: bltu t5, s8, .LBB9_28 +; RV32I-NEXT: # %bb.27: +; RV32I-NEXT: and t3, a1, a5 +; RV32I-NEXT: mv t0, ra +; RV32I-NEXT: bnez t5, .LBB9_29 +; RV32I-NEXT: j .LBB9_30 +; RV32I-NEXT: .LBB9_28: +; RV32I-NEXT: lw t3, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: and t3, t4, t3 +; RV32I-NEXT: or t3, t3, t0 +; RV32I-NEXT: mv t0, ra +; RV32I-NEXT: beqz t5, .LBB9_30 +; RV32I-NEXT: .LBB9_29: +; RV32I-NEXT: mv t0, t3 +; RV32I-NEXT: .LBB9_30: +; RV32I-NEXT: bltz t6, .LBB9_32 +; RV32I-NEXT: # %bb.31: +; RV32I-NEXT: srl t4, a6, t6 +; RV32I-NEXT: j .LBB9_33 +; RV32I-NEXT: .LBB9_32: +; RV32I-NEXT: srl t3, s5, a0 +; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll t4, a1, s10 +; RV32I-NEXT: or t4, t3, t4 +; RV32I-NEXT: .LBB9_33: +; RV32I-NEXT: sltiu s0, a0, 64 +; RV32I-NEXT: sw s10, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltz s9, .LBB9_35 +; RV32I-NEXT: # %bb.34: +; RV32I-NEXT: srl a1, a4, s9 +; RV32I-NEXT: j .LBB9_36 +; RV32I-NEXT: .LBB9_35: +; RV32I-NEXT: srl t3, ra, s1 +; RV32I-NEXT: lw s7, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll a1, s7, a1 +; RV32I-NEXT: or a1, t3, a1 +; RV32I-NEXT: .LBB9_36: +; RV32I-NEXT: neg s10, s0 +; RV32I-NEXT: sltiu t3, s1, 64 +; RV32I-NEXT: neg s0, t3 +; RV32I-NEXT: li t3, 128 +; RV32I-NEXT: sw ra, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltu a0, t3, .LBB9_38 +; RV32I-NEXT: # %bb.37: +; RV32I-NEXT: and a1, s0, a1 +; RV32I-NEXT: j .LBB9_39 +; RV32I-NEXT: .LBB9_38: +; RV32I-NEXT: and a1, s10, t4 +; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: .LBB9_39: +; RV32I-NEXT: lw t3, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: mv ra, s5 +; RV32I-NEXT: beqz a0, .LBB9_41 +; RV32I-NEXT: # %bb.40: +; RV32I-NEXT: mv ra, a1 +; RV32I-NEXT: .LBB9_41: +; RV32I-NEXT: sub a1, s8, a0 +; RV32I-NEXT: xori t4, a1, 31 +; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: bgez a1, .LBB9_43 +; RV32I-NEXT: # %bb.42: +; RV32I-NEXT: sll a1, a6, s11 +; RV32I-NEXT: srli t0, s5, 1 +; RV32I-NEXT: srl t0, t0, t4 +; RV32I-NEXT: or t2, a1, t0 +; RV32I-NEXT: .LBB9_43: +; RV32I-NEXT: slti a1, t6, 0 +; RV32I-NEXT: neg s2, a1 +; RV32I-NEXT: slti t0, s4, 0 +; RV32I-NEXT: neg s0, t0 +; RV32I-NEXT: bltu a0, s8, .LBB9_45 +; RV32I-NEXT: # %bb.44: +; RV32I-NEXT: lw a1, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl t0, a6, a1 +; RV32I-NEXT: and t2, s0, t0 +; RV32I-NEXT: j .LBB9_46 +; RV32I-NEXT: .LBB9_45: +; RV32I-NEXT: srl t0, s3, a0 +; RV32I-NEXT: and t0, s2, t0 +; RV32I-NEXT: or t2, t0, t2 +; RV32I-NEXT: .LBB9_46: +; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t4, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 64 +; RV32I-NEXT: mv t0, s3 +; RV32I-NEXT: beqz a0, .LBB9_48 +; RV32I-NEXT: # %bb.47: +; RV32I-NEXT: mv t0, t2 +; RV32I-NEXT: .LBB9_48: +; RV32I-NEXT: sll s7, a3, s11 +; RV32I-NEXT: srli s8, a7, 1 +; RV32I-NEXT: xori s0, t5, 31 +; RV32I-NEXT: bltz s6, .LBB9_50 +; RV32I-NEXT: # %bb.49: +; RV32I-NEXT: mv t4, a5 +; RV32I-NEXT: j .LBB9_51 +; RV32I-NEXT: .LBB9_50: +; RV32I-NEXT: srl t2, s8, s0 +; RV32I-NEXT: or t4, s7, t2 +; RV32I-NEXT: .LBB9_51: +; RV32I-NEXT: sll s5, a4, s11 +; RV32I-NEXT: lw t2, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: srli s11, t2, 1 +; RV32I-NEXT: bltz t3, .LBB9_53 +; RV32I-NEXT: # %bb.52: +; RV32I-NEXT: lw t3, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: j .LBB9_54 +; RV32I-NEXT: .LBB9_53: +; RV32I-NEXT: li t2, 192 +; RV32I-NEXT: sub t2, t2, a0 +; RV32I-NEXT: xori t2, t2, 31 +; RV32I-NEXT: srl t2, s11, t2 +; RV32I-NEXT: or t3, s5, t2 +; RV32I-NEXT: .LBB9_54: +; RV32I-NEXT: slti t2, s9, 0 +; RV32I-NEXT: neg t2, t2 +; RV32I-NEXT: bltu s1, a1, .LBB9_56 +; RV32I-NEXT: # %bb.55: +; RV32I-NEXT: lw a1, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl t3, a4, a1 +; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: slti s9, a1, 0 +; RV32I-NEXT: neg s9, s9 +; RV32I-NEXT: and t3, s9, t3 +; RV32I-NEXT: mv s9, a3 +; RV32I-NEXT: bnez s1, .LBB9_57 +; RV32I-NEXT: j .LBB9_58 +; RV32I-NEXT: .LBB9_56: +; RV32I-NEXT: srl s9, a3, s1 +; RV32I-NEXT: and s9, t2, s9 +; RV32I-NEXT: or t3, s9, t3 +; RV32I-NEXT: mv s9, a3 +; RV32I-NEXT: beqz s1, .LBB9_58 +; RV32I-NEXT: .LBB9_57: +; RV32I-NEXT: mv s9, t3 +; RV32I-NEXT: .LBB9_58: +; RV32I-NEXT: li a1, 128 +; RV32I-NEXT: bltu a0, a1, .LBB9_63 +; RV32I-NEXT: # %bb.59: +; RV32I-NEXT: lw t3, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez a0, .LBB9_64 +; RV32I-NEXT: .LBB9_60: +; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: bltz a1, .LBB9_65 +; RV32I-NEXT: .LBB9_61: +; RV32I-NEXT: li s7, 64 +; RV32I-NEXT: bltz s6, .LBB9_66 +; RV32I-NEXT: .LBB9_62: +; RV32I-NEXT: lw t4, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: mv t0, t4 +; RV32I-NEXT: bltu t5, s7, .LBB9_67 +; RV32I-NEXT: j .LBB9_68 +; RV32I-NEXT: .LBB9_63: +; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: and t3, a1, t4 +; RV32I-NEXT: or s9, t0, t3 +; RV32I-NEXT: lw t3, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: beqz a0, .LBB9_60 +; RV32I-NEXT: .LBB9_64: +; RV32I-NEXT: mv s3, s9 +; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: bgez a1, .LBB9_61 +; RV32I-NEXT: .LBB9_65: +; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl a5, s8, a1 +; RV32I-NEXT: or a5, s7, a5 +; RV32I-NEXT: li s7, 64 +; RV32I-NEXT: bgez s6, .LBB9_62 +; RV32I-NEXT: .LBB9_66: +; RV32I-NEXT: srl t0, s11, s0 +; RV32I-NEXT: or t0, s5, t0 +; RV32I-NEXT: lw t4, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: bgeu t5, s7, .LBB9_68 +; RV32I-NEXT: .LBB9_67: +; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: slti a5, a1, 0 +; RV32I-NEXT: neg a5, a5 +; RV32I-NEXT: lw a1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: and a5, a5, a1 +; RV32I-NEXT: or a5, t0, a5 +; RV32I-NEXT: .LBB9_68: +; RV32I-NEXT: mv t0, a4 +; RV32I-NEXT: bnez t5, .LBB9_71 +; RV32I-NEXT: # %bb.69: +; RV32I-NEXT: li a1, 128 +; RV32I-NEXT: bltu a0, a1, .LBB9_72 +; RV32I-NEXT: .LBB9_70: +; RV32I-NEXT: srl a5, a4, s1 +; RV32I-NEXT: and a5, t2, a5 +; RV32I-NEXT: lw a1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: and a5, a1, a5 +; RV32I-NEXT: lw t5, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez a0, .LBB9_73 +; RV32I-NEXT: j .LBB9_74 +; RV32I-NEXT: .LBB9_71: +; RV32I-NEXT: mv t0, a5 +; RV32I-NEXT: li a1, 128 +; RV32I-NEXT: bgeu a0, a1, .LBB9_70 +; RV32I-NEXT: .LBB9_72: +; RV32I-NEXT: srl a5, a6, a0 +; RV32I-NEXT: and a5, s2, a5 +; RV32I-NEXT: and a5, s10, a5 +; RV32I-NEXT: or a5, a5, t0 +; RV32I-NEXT: lw t5, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: beqz a0, .LBB9_74 +; RV32I-NEXT: .LBB9_73: +; RV32I-NEXT: mv a6, a5 +; RV32I-NEXT: .LBB9_74: +; RV32I-NEXT: bltz s4, .LBB9_77 +; RV32I-NEXT: # %bb.75: +; RV32I-NEXT: srl a5, a4, s4 +; RV32I-NEXT: bgez t6, .LBB9_78 +; RV32I-NEXT: .LBB9_76: +; RV32I-NEXT: srl t0, a7, a0 +; RV32I-NEXT: lw a1, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t2, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll t2, a1, t2 +; RV32I-NEXT: or t0, t0, t2 +; RV32I-NEXT: lw a1, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: bltu a0, s7, .LBB9_79 +; RV32I-NEXT: j .LBB9_80 +; RV32I-NEXT: .LBB9_77: +; RV32I-NEXT: lw a5, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl a5, a5, t5 +; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll t0, t3, a1 +; RV32I-NEXT: or a5, a5, t0 +; RV32I-NEXT: bltz t6, .LBB9_76 +; RV32I-NEXT: .LBB9_78: +; RV32I-NEXT: srl t0, a3, t6 +; RV32I-NEXT: lw a1, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: bgeu a0, s7, .LBB9_80 +; RV32I-NEXT: .LBB9_79: +; RV32I-NEXT: and a5, a1, t4 +; RV32I-NEXT: or a5, t0, a5 +; RV32I-NEXT: .LBB9_80: +; RV32I-NEXT: bnez a0, .LBB9_84 +; RV32I-NEXT: # %bb.81: +; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: bltz a1, .LBB9_85 +; RV32I-NEXT: .LBB9_82: +; RV32I-NEXT: sltiu a5, a0, 128 +; RV32I-NEXT: bltu a0, s7, .LBB9_86 +; RV32I-NEXT: .LBB9_83: +; RV32I-NEXT: srl t0, a4, t5 +; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: and t2, a1, t0 +; RV32I-NEXT: neg t0, a5 +; RV32I-NEXT: bnez a0, .LBB9_87 +; RV32I-NEXT: j .LBB9_88 +; RV32I-NEXT: .LBB9_84: +; RV32I-NEXT: mv a7, a5 +; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: bgez a1, .LBB9_82 +; RV32I-NEXT: .LBB9_85: +; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl a5, s11, a1 +; RV32I-NEXT: or t4, s5, a5 +; RV32I-NEXT: sltiu a5, a0, 128 +; RV32I-NEXT: bgeu a0, s7, .LBB9_83 +; RV32I-NEXT: .LBB9_86: +; RV32I-NEXT: srl t0, a3, a0 +; RV32I-NEXT: and t0, s2, t0 +; RV32I-NEXT: or t2, t0, t4 +; RV32I-NEXT: neg t0, a5 +; RV32I-NEXT: beqz a0, .LBB9_88 +; RV32I-NEXT: .LBB9_87: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: .LBB9_88: +; RV32I-NEXT: and a5, t0, a7 +; RV32I-NEXT: and a3, t0, a3 +; RV32I-NEXT: bltz t6, .LBB9_90 +; RV32I-NEXT: # %bb.89: +; RV32I-NEXT: srl a7, a4, t6 +; RV32I-NEXT: j .LBB9_91 +; RV32I-NEXT: .LBB9_90: +; RV32I-NEXT: lw a7, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl a7, a7, a0 +; RV32I-NEXT: lw a1, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll t2, t3, a1 +; RV32I-NEXT: or a7, a7, t2 +; RV32I-NEXT: .LBB9_91: +; RV32I-NEXT: and a7, s10, a7 +; RV32I-NEXT: and a7, t0, a7 +; RV32I-NEXT: srl a0, a4, a0 +; RV32I-NEXT: and a0, s2, a0 +; RV32I-NEXT: and a0, s10, a0 +; RV32I-NEXT: and a0, t0, a0 +; RV32I-NEXT: sb a7, 24(a2) +; RV32I-NEXT: sb a0, 28(a2) +; RV32I-NEXT: srli a1, a7, 24 +; RV32I-NEXT: sb a1, 27(a2) +; RV32I-NEXT: srli a1, a7, 16 +; RV32I-NEXT: sb a1, 26(a2) +; RV32I-NEXT: srli a1, a7, 8 +; RV32I-NEXT: sb a1, 25(a2) +; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: sb a1, 31(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: sb a1, 30(a2) +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb a0, 29(a2) +; RV32I-NEXT: sb a5, 16(a2) +; RV32I-NEXT: srli a0, a5, 24 +; RV32I-NEXT: sb a0, 19(a2) +; RV32I-NEXT: srli a0, a5, 16 +; RV32I-NEXT: sb a0, 18(a2) +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: sb a5, 17(a2) +; RV32I-NEXT: sb a3, 20(a2) +; RV32I-NEXT: srli a0, a3, 24 +; RV32I-NEXT: sb a0, 23(a2) +; RV32I-NEXT: srli a0, a3, 16 +; RV32I-NEXT: sb a0, 22(a2) +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a3, 21(a2) +; RV32I-NEXT: sb t1, 0(a2) +; RV32I-NEXT: sb a6, 12(a2) +; RV32I-NEXT: srli a0, t1, 24 +; RV32I-NEXT: sb a0, 3(a2) +; RV32I-NEXT: srli a0, t1, 16 +; RV32I-NEXT: sb a0, 2(a2) +; RV32I-NEXT: srli a0, t1, 8 +; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: sb s3, 4(a2) +; RV32I-NEXT: sb ra, 8(a2) +; RV32I-NEXT: srli a0, a6, 24 +; RV32I-NEXT: sb a0, 15(a2) +; RV32I-NEXT: srli a0, a6, 16 +; RV32I-NEXT: sb a0, 14(a2) +; RV32I-NEXT: srli a0, a6, 8 +; RV32I-NEXT: sb a0, 13(a2) +; RV32I-NEXT: srli a0, s3, 24 +; RV32I-NEXT: sb a0, 7(a2) +; RV32I-NEXT: srli a0, s3, 16 +; RV32I-NEXT: sb a0, 6(a2) +; RV32I-NEXT: srli a0, s3, 8 +; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: srli a0, ra, 24 +; RV32I-NEXT: sb a0, 11(a2) +; RV32I-NEXT: srli a0, ra, 16 +; RV32I-NEXT: sb a0, 10(a2) +; RV32I-NEXT: srli a0, ra, 8 +; RV32I-NEXT: sb a0, 9(a2) +; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 128 +; RV32I-NEXT: ret + %src = load i256, ptr %src.ptr, align 1 + %bitOff = load i256, ptr %bitOff.ptr, align 1 + %res = lshr i256 %src, %bitOff + store i256 %res, ptr %dst, align 1 + ret void +} +define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; RV64I-LABEL: shl_32bytes: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a3, 17(a0) +; RV64I-NEXT: lbu a4, 16(a0) +; RV64I-NEXT: lbu a5, 18(a0) +; RV64I-NEXT: lbu a6, 19(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a5, a5, a3 +; RV64I-NEXT: lbu a3, 21(a0) +; RV64I-NEXT: lbu a4, 20(a0) +; RV64I-NEXT: lbu a6, 22(a0) +; RV64I-NEXT: lbu a7, 23(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a4, a7, a6 +; RV64I-NEXT: or a6, a4, a3 +; RV64I-NEXT: lbu a3, 25(a0) +; RV64I-NEXT: lbu a4, 24(a0) +; RV64I-NEXT: lbu a7, 26(a0) +; RV64I-NEXT: lbu t0, 27(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a4, t0, a7 +; RV64I-NEXT: or a7, a4, a3 +; RV64I-NEXT: lbu a3, 29(a0) +; RV64I-NEXT: lbu a4, 28(a0) +; RV64I-NEXT: lbu t0, 30(a0) +; RV64I-NEXT: lbu t1, 31(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a4, t1, t0 +; RV64I-NEXT: or t0, a4, a3 +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu t1, 2(a0) +; RV64I-NEXT: lbu t2, 3(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t2, t2, 24 +; RV64I-NEXT: or a4, t2, t1 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 5(a0) +; RV64I-NEXT: lbu t1, 4(a0) +; RV64I-NEXT: lbu t2, 6(a0) +; RV64I-NEXT: lbu t3, 7(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, t1 +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t3, t3, 24 +; RV64I-NEXT: or t1, t3, t2 +; RV64I-NEXT: or a4, t1, a4 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 9(a0) +; RV64I-NEXT: lbu t1, 8(a0) +; RV64I-NEXT: lbu t2, 10(a0) +; RV64I-NEXT: lbu t3, 11(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, t1 +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t3, t3, 24 +; RV64I-NEXT: or t1, t3, t2 +; RV64I-NEXT: or a4, t1, a4 +; RV64I-NEXT: lbu t1, 13(a0) +; RV64I-NEXT: lbu t2, 12(a0) +; RV64I-NEXT: lbu t3, 14(a0) +; RV64I-NEXT: lbu a0, 15(a0) +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t1, t1, t2 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, t3 +; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a4, a0, a4 +; RV64I-NEXT: lbu a0, 1(a1) +; RV64I-NEXT: lbu t1, 0(a1) +; RV64I-NEXT: lbu t2, 2(a1) +; RV64I-NEXT: lbu t3, 3(a1) +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t3, t3, 24 +; RV64I-NEXT: lbu t1, 5(a1) +; RV64I-NEXT: lbu t4, 4(a1) +; RV64I-NEXT: or t2, t3, t2 +; RV64I-NEXT: or t2, t2, a0 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t1, t1, t4 +; RV64I-NEXT: lbu t3, 6(a1) +; RV64I-NEXT: lbu t4, 7(a1) +; RV64I-NEXT: slli a0, a6, 32 +; RV64I-NEXT: slli a1, t0, 32 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli t4, t4, 24 +; RV64I-NEXT: or a6, t4, t3 +; RV64I-NEXT: or a6, a6, t1 +; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: or a6, a6, t2 +; RV64I-NEXT: addi t1, a6, -128 +; RV64I-NEXT: addi t2, a6, -192 +; RV64I-NEXT: srli t0, a3, 1 +; RV64I-NEXT: bltz t2, .LBB10_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: sll t3, a3, t2 +; RV64I-NEXT: j .LBB10_3 +; RV64I-NEXT: .LBB10_2: +; RV64I-NEXT: sll t3, a4, t1 +; RV64I-NEXT: xori t4, t1, 63 +; RV64I-NEXT: srl t4, t0, t4 +; RV64I-NEXT: or t3, t3, t4 +; RV64I-NEXT: .LBB10_3: +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: addi a7, a6, -64 +; RV64I-NEXT: xori a5, a6, 63 +; RV64I-NEXT: bltz a7, .LBB10_5 +; RV64I-NEXT: # %bb.4: +; RV64I-NEXT: sll s1, a0, a7 +; RV64I-NEXT: j .LBB10_6 +; RV64I-NEXT: .LBB10_5: +; RV64I-NEXT: sll t4, a1, a6 +; RV64I-NEXT: srli t5, a0, 1 +; RV64I-NEXT: srl t5, t5, a5 +; RV64I-NEXT: or s1, t4, t5 +; RV64I-NEXT: .LBB10_6: +; RV64I-NEXT: negw t6, a6 +; RV64I-NEXT: srl t4, a4, t6 +; RV64I-NEXT: li s0, 64 +; RV64I-NEXT: li t5, 128 +; RV64I-NEXT: sub s0, s0, a6 +; RV64I-NEXT: bltu a6, t5, .LBB10_12 +; RV64I-NEXT: # %bb.7: +; RV64I-NEXT: bnez a6, .LBB10_13 +; RV64I-NEXT: .LBB10_8: +; RV64I-NEXT: bgez s0, .LBB10_10 +; RV64I-NEXT: .LBB10_9: +; RV64I-NEXT: srl t3, a3, t6 +; RV64I-NEXT: slli t4, a4, 1 +; RV64I-NEXT: sub t6, t5, a6 +; RV64I-NEXT: xori t6, t6, 63 +; RV64I-NEXT: sll t4, t4, t6 +; RV64I-NEXT: or t4, t3, t4 +; RV64I-NEXT: .LBB10_10: +; RV64I-NEXT: slti t3, a7, 0 +; RV64I-NEXT: neg t3, t3 +; RV64I-NEXT: bltu a6, t5, .LBB10_14 +; RV64I-NEXT: # %bb.11: +; RV64I-NEXT: sll t1, a3, t1 +; RV64I-NEXT: slti t2, t2, 0 +; RV64I-NEXT: neg t2, t2 +; RV64I-NEXT: and t1, t2, t1 +; RV64I-NEXT: bnez a6, .LBB10_15 +; RV64I-NEXT: j .LBB10_16 +; RV64I-NEXT: .LBB10_12: +; RV64I-NEXT: slti t3, s0, 0 +; RV64I-NEXT: neg t3, t3 +; RV64I-NEXT: and t3, t3, t4 +; RV64I-NEXT: or t3, s1, t3 +; RV64I-NEXT: beqz a6, .LBB10_8 +; RV64I-NEXT: .LBB10_13: +; RV64I-NEXT: mv a1, t3 +; RV64I-NEXT: bltz s0, .LBB10_9 +; RV64I-NEXT: j .LBB10_10 +; RV64I-NEXT: .LBB10_14: +; RV64I-NEXT: sll t1, a0, a6 +; RV64I-NEXT: and t1, t3, t1 +; RV64I-NEXT: or t1, t1, t4 +; RV64I-NEXT: beqz a6, .LBB10_16 +; RV64I-NEXT: .LBB10_15: +; RV64I-NEXT: mv a0, t1 +; RV64I-NEXT: .LBB10_16: +; RV64I-NEXT: bltz a7, .LBB10_18 +; RV64I-NEXT: # %bb.17: +; RV64I-NEXT: sll a4, a3, a7 +; RV64I-NEXT: j .LBB10_19 +; RV64I-NEXT: .LBB10_18: +; RV64I-NEXT: sll a4, a4, a6 +; RV64I-NEXT: srl a5, t0, a5 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: .LBB10_19: +; RV64I-NEXT: sltiu a5, a6, 128 +; RV64I-NEXT: neg a5, a5 +; RV64I-NEXT: and a4, a5, a4 +; RV64I-NEXT: sll a3, a3, a6 +; RV64I-NEXT: and a3, t3, a3 +; RV64I-NEXT: and a3, a5, a3 +; RV64I-NEXT: sb a3, 0(a2) +; RV64I-NEXT: sb a4, 8(a2) +; RV64I-NEXT: srli a5, a3, 56 +; RV64I-NEXT: sb a5, 7(a2) +; RV64I-NEXT: srli a5, a3, 48 +; RV64I-NEXT: sb a5, 6(a2) +; RV64I-NEXT: srli a5, a3, 40 +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: srli a5, a3, 32 +; RV64I-NEXT: sb a5, 4(a2) +; RV64I-NEXT: srli a5, a3, 24 +; RV64I-NEXT: sb a5, 3(a2) +; RV64I-NEXT: srli a5, a3, 16 +; RV64I-NEXT: sb a5, 2(a2) +; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a3, 1(a2) +; RV64I-NEXT: srli a3, a4, 56 +; RV64I-NEXT: sb a3, 15(a2) +; RV64I-NEXT: srli a3, a4, 48 +; RV64I-NEXT: sb a3, 14(a2) +; RV64I-NEXT: srli a3, a4, 40 +; RV64I-NEXT: sb a3, 13(a2) +; RV64I-NEXT: srli a3, a4, 32 +; RV64I-NEXT: sb a3, 12(a2) +; RV64I-NEXT: srli a3, a4, 24 +; RV64I-NEXT: sb a3, 11(a2) +; RV64I-NEXT: srli a3, a4, 16 +; RV64I-NEXT: sb a3, 10(a2) +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: sb a4, 9(a2) +; RV64I-NEXT: sb a1, 24(a2) +; RV64I-NEXT: sb a0, 16(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: sb a3, 31(a2) +; RV64I-NEXT: srli a3, a1, 48 +; RV64I-NEXT: sb a3, 30(a2) +; RV64I-NEXT: srli a3, a1, 40 +; RV64I-NEXT: sb a3, 29(a2) +; RV64I-NEXT: srli a3, a1, 32 +; RV64I-NEXT: sb a3, 28(a2) +; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: sb a3, 27(a2) +; RV64I-NEXT: srli a3, a1, 16 +; RV64I-NEXT: sb a3, 26(a2) +; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: sb a1, 25(a2) +; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: sb a1, 23(a2) +; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: sb a1, 22(a2) +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: sb a1, 21(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: sb a1, 20(a2) +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: sb a1, 19(a2) +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: sb a1, 18(a2) +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a0, 17(a2) +; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV32I-LABEL: shl_32bytes: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -128 +; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a7, 24(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: lbu t4, 26(a0) +; RV32I-NEXT: lbu t5, 27(a0) +; RV32I-NEXT: lbu t0, 28(a0) +; RV32I-NEXT: lbu s0, 29(a0) +; RV32I-NEXT: lbu s1, 30(a0) +; RV32I-NEXT: lbu s3, 31(a0) +; RV32I-NEXT: lbu a6, 16(a0) +; RV32I-NEXT: lbu t6, 17(a0) +; RV32I-NEXT: lbu s2, 18(a0) +; RV32I-NEXT: lbu s6, 19(a0) +; RV32I-NEXT: lbu s4, 20(a0) +; RV32I-NEXT: lbu t1, 21(a0) +; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu s5, 23(a0) +; RV32I-NEXT: lbu a3, 9(a0) +; RV32I-NEXT: lbu a4, 8(a0) +; RV32I-NEXT: lbu a5, 10(a0) +; RV32I-NEXT: lbu s7, 11(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli s7, s7, 24 +; RV32I-NEXT: or a4, s7, a5 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 13(a0) +; RV32I-NEXT: lbu a5, 12(a0) +; RV32I-NEXT: lbu s7, 14(a0) +; RV32I-NEXT: lbu s9, 15(a0) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or s8, a4, a5 +; RV32I-NEXT: slli s7, s7, 16 +; RV32I-NEXT: slli s9, s9, 24 +; RV32I-NEXT: or s9, s9, s7 +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu s7, 2(a0) +; RV32I-NEXT: lbu s10, 3(a0) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: slli s7, s7, 16 +; RV32I-NEXT: slli s10, s10, 24 +; RV32I-NEXT: or a5, s10, s7 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: lbu a5, 5(a0) +; RV32I-NEXT: lbu s7, 4(a0) +; RV32I-NEXT: lbu s10, 6(a0) +; RV32I-NEXT: lbu a0, 7(a0) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a5, a5, s7 +; RV32I-NEXT: slli s10, s10, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, s10 +; RV32I-NEXT: or s10, a0, a5 +; RV32I-NEXT: lbu a0, 1(a1) +; RV32I-NEXT: lbu a5, 0(a1) +; RV32I-NEXT: lbu s7, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: slli s7, s7, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, s7 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: addi a5, a0, -192 +; RV32I-NEXT: addi a1, a0, -224 +; RV32I-NEXT: srli s7, a4, 1 +; RV32I-NEXT: sw s10, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a5, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltz a1, .LBB10_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sll s7, a4, a1 +; RV32I-NEXT: j .LBB10_3 +; RV32I-NEXT: .LBB10_2: +; RV32I-NEXT: sll a1, s10, a5 +; RV32I-NEXT: xori a5, a5, 31 +; RV32I-NEXT: srl a5, s7, a5 +; RV32I-NEXT: or s7, a1, a5 +; RV32I-NEXT: .LBB10_3: +; RV32I-NEXT: slli s10, t6, 8 +; RV32I-NEXT: slli ra, s2, 16 +; RV32I-NEXT: slli s6, s6, 24 +; RV32I-NEXT: or t6, s9, s8 +; RV32I-NEXT: addi s2, a0, -128 +; RV32I-NEXT: srli a1, a3, 1 +; RV32I-NEXT: addi s11, a0, -160 +; RV32I-NEXT: xori s8, s2, 31 +; RV32I-NEXT: sw a1, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltz s11, .LBB10_5 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: sll s8, a3, s11 +; RV32I-NEXT: j .LBB10_6 +; RV32I-NEXT: .LBB10_5: +; RV32I-NEXT: sll a5, t6, s2 +; RV32I-NEXT: srl s8, a1, s8 +; RV32I-NEXT: or s8, a5, s8 +; RV32I-NEXT: .LBB10_6: +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: slli a5, t2, 16 +; RV32I-NEXT: slli s5, s5, 24 +; RV32I-NEXT: or a6, s10, a6 +; RV32I-NEXT: or s6, s6, ra +; RV32I-NEXT: neg s10, a0 +; RV32I-NEXT: lw t2, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl s9, t2, s10 +; RV32I-NEXT: li t2, 160 +; RV32I-NEXT: li ra, 64 +; RV32I-NEXT: sub t2, t2, a0 +; RV32I-NEXT: li a1, 64 +; RV32I-NEXT: sw s9, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t2, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: bgeu s2, ra, .LBB10_8 +; RV32I-NEXT: # %bb.7: +; RV32I-NEXT: slti t2, t2, 0 +; RV32I-NEXT: neg t2, t2 +; RV32I-NEXT: and t2, t2, s9 +; RV32I-NEXT: or s7, s8, t2 +; RV32I-NEXT: .LBB10_8: +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t5, t5, 24 +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: slli s1, s1, 16 +; RV32I-NEXT: slli s3, s3, 24 +; RV32I-NEXT: or s4, t1, s4 +; RV32I-NEXT: or s5, s5, a5 +; RV32I-NEXT: or ra, s6, a6 +; RV32I-NEXT: sw t6, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a6, t6 +; RV32I-NEXT: beqz s2, .LBB10_10 +; RV32I-NEXT: # %bb.9: +; RV32I-NEXT: mv a6, s7 +; RV32I-NEXT: .LBB10_10: +; RV32I-NEXT: or a5, t3, a7 +; RV32I-NEXT: or a7, t5, t4 +; RV32I-NEXT: or t0, s0, t0 +; RV32I-NEXT: or t1, s3, s1 +; RV32I-NEXT: or s6, s5, s4 +; RV32I-NEXT: addi t4, a0, -64 +; RV32I-NEXT: srli s0, ra, 1 +; RV32I-NEXT: addi t6, a0, -96 +; RV32I-NEXT: xori t3, t4, 31 +; RV32I-NEXT: sw t3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltz t6, .LBB10_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: sll t3, ra, t6 +; RV32I-NEXT: j .LBB10_13 +; RV32I-NEXT: .LBB10_12: +; RV32I-NEXT: sll t2, s6, t4 +; RV32I-NEXT: srl t3, s0, t3 +; RV32I-NEXT: or t3, t2, t3 +; RV32I-NEXT: .LBB10_13: +; RV32I-NEXT: or a7, a7, a5 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: addi t5, a0, -32 +; RV32I-NEXT: xori s4, a0, 31 +; RV32I-NEXT: bltz t5, .LBB10_15 +; RV32I-NEXT: # %bb.14: +; RV32I-NEXT: sll a5, a7, t5 +; RV32I-NEXT: j .LBB10_16 +; RV32I-NEXT: .LBB10_15: +; RV32I-NEXT: sll a5, t0, a0 +; RV32I-NEXT: srli t1, a7, 1 +; RV32I-NEXT: srl t1, t1, s4 +; RV32I-NEXT: or a5, a5, t1 +; RV32I-NEXT: .LBB10_16: +; RV32I-NEXT: srl s1, s6, s10 +; RV32I-NEXT: li t1, 32 +; RV32I-NEXT: sub t2, t1, a0 +; RV32I-NEXT: sw t2, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: slti t2, t2, 0 +; RV32I-NEXT: neg s9, t2 +; RV32I-NEXT: sw s1, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: bgeu a0, a1, .LBB10_18 +; RV32I-NEXT: # %bb.17: +; RV32I-NEXT: and t2, s9, s1 +; RV32I-NEXT: or t3, a5, t2 +; RV32I-NEXT: .LBB10_18: +; RV32I-NEXT: sw t4, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s1, t0 +; RV32I-NEXT: beqz a0, .LBB10_20 +; RV32I-NEXT: # %bb.19: +; RV32I-NEXT: mv s1, t3 +; RV32I-NEXT: .LBB10_20: +; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl a1, a1, s10 +; RV32I-NEXT: li t2, 96 +; RV32I-NEXT: sub t4, t2, a0 +; RV32I-NEXT: slti t2, t4, 0 +; RV32I-NEXT: neg t3, t2 +; RV32I-NEXT: li a5, 128 +; RV32I-NEXT: sub s7, a5, a0 +; RV32I-NEXT: sltiu t2, s7, 64 +; RV32I-NEXT: neg t2, t2 +; RV32I-NEXT: bgeu a0, a5, .LBB10_22 +; RV32I-NEXT: # %bb.21: +; RV32I-NEXT: and a6, t3, a1 +; RV32I-NEXT: and a6, t2, a6 +; RV32I-NEXT: or a6, s1, a6 +; RV32I-NEXT: .LBB10_22: +; RV32I-NEXT: lw s3, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a1, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz a0, .LBB10_24 +; RV32I-NEXT: # %bb.23: +; RV32I-NEXT: mv t0, a6 +; RV32I-NEXT: .LBB10_24: +; RV32I-NEXT: neg a6, s7 +; RV32I-NEXT: sub s8, t1, s7 +; RV32I-NEXT: sll t1, a3, a6 +; RV32I-NEXT: sw t2, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltz s8, .LBB10_27 +; RV32I-NEXT: # %bb.25: +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: li a1, 64 +; RV32I-NEXT: li a5, 64 +; RV32I-NEXT: bgeu s7, a1, .LBB10_28 +; RV32I-NEXT: .LBB10_26: +; RV32I-NEXT: lw t2, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: and t2, t3, t2 +; RV32I-NEXT: or t2, t2, a6 +; RV32I-NEXT: mv a6, s3 +; RV32I-NEXT: bnez s7, .LBB10_29 +; RV32I-NEXT: j .LBB10_30 +; RV32I-NEXT: .LBB10_27: +; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll a6, a1, a6 +; RV32I-NEXT: li a1, 64 +; RV32I-NEXT: sub t2, a1, s7 +; RV32I-NEXT: xori t2, t2, 31 +; RV32I-NEXT: lw a5, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl t2, a5, t2 +; RV32I-NEXT: or a6, a6, t2 +; RV32I-NEXT: li a5, 64 +; RV32I-NEXT: bltu s7, a1, .LBB10_26 +; RV32I-NEXT: .LBB10_28: +; RV32I-NEXT: lw a1, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: and t2, s9, a1 +; RV32I-NEXT: mv a6, s3 +; RV32I-NEXT: beqz s7, .LBB10_30 +; RV32I-NEXT: .LBB10_29: +; RV32I-NEXT: mv a6, t2 +; RV32I-NEXT: .LBB10_30: +; RV32I-NEXT: bltz t5, .LBB10_32 +; RV32I-NEXT: # %bb.31: +; RV32I-NEXT: sll s0, ra, t5 +; RV32I-NEXT: j .LBB10_33 +; RV32I-NEXT: .LBB10_32: +; RV32I-NEXT: sll t2, s6, a0 +; RV32I-NEXT: srl t3, s0, s4 +; RV32I-NEXT: or s0, t2, t3 +; RV32I-NEXT: .LBB10_33: +; RV32I-NEXT: sltiu t3, a0, 64 +; RV32I-NEXT: sw s4, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltz s11, .LBB10_35 +; RV32I-NEXT: # %bb.34: +; RV32I-NEXT: sll a1, a4, s11 +; RV32I-NEXT: j .LBB10_36 +; RV32I-NEXT: .LBB10_35: +; RV32I-NEXT: sll t2, s3, s2 +; RV32I-NEXT: lw s4, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a1, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl a1, s4, a1 +; RV32I-NEXT: or a1, t2, a1 +; RV32I-NEXT: .LBB10_36: +; RV32I-NEXT: neg s5, t3 +; RV32I-NEXT: sltiu t2, s2, 64 +; RV32I-NEXT: neg t3, t2 +; RV32I-NEXT: li t2, 128 +; RV32I-NEXT: bltu a0, t2, .LBB10_38 +; RV32I-NEXT: # %bb.37: +; RV32I-NEXT: and a1, t3, a1 +; RV32I-NEXT: mv s0, s6 +; RV32I-NEXT: bnez a0, .LBB10_39 +; RV32I-NEXT: j .LBB10_40 +; RV32I-NEXT: .LBB10_38: +; RV32I-NEXT: and a1, s5, s0 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: mv s0, s6 +; RV32I-NEXT: beqz a0, .LBB10_40 +; RV32I-NEXT: .LBB10_39: +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: .LBB10_40: +; RV32I-NEXT: srl a1, a3, s10 +; RV32I-NEXT: lw a6, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: slli a6, a6, 1 +; RV32I-NEXT: sub t2, a5, a0 +; RV32I-NEXT: xori t2, t2, 31 +; RV32I-NEXT: lw s1, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw t2, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltz s1, .LBB10_42 +; RV32I-NEXT: # %bb.41: +; RV32I-NEXT: lw s4, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: j .LBB10_43 +; RV32I-NEXT: .LBB10_42: +; RV32I-NEXT: sll t2, a6, t2 +; RV32I-NEXT: or s4, a1, t2 +; RV32I-NEXT: .LBB10_43: +; RV32I-NEXT: srl s1, a4, s10 +; RV32I-NEXT: slli s3, s3, 1 +; RV32I-NEXT: xori s9, s7, 31 +; RV32I-NEXT: sw s3, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltz t4, .LBB10_45 +; RV32I-NEXT: # %bb.44: +; RV32I-NEXT: mv s3, s1 +; RV32I-NEXT: lw t2, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: bltu s7, a5, .LBB10_46 +; RV32I-NEXT: j .LBB10_47 +; RV32I-NEXT: .LBB10_45: +; RV32I-NEXT: sll t2, s3, s9 +; RV32I-NEXT: mv s3, s1 +; RV32I-NEXT: or t2, s1, t2 +; RV32I-NEXT: bgeu s7, a5, .LBB10_47 +; RV32I-NEXT: .LBB10_46: +; RV32I-NEXT: slti s4, s8, 0 +; RV32I-NEXT: neg s4, s4 +; RV32I-NEXT: and t1, s4, t1 +; RV32I-NEXT: or s4, t2, t1 +; RV32I-NEXT: .LBB10_47: +; RV32I-NEXT: mv s8, a4 +; RV32I-NEXT: beqz s7, .LBB10_49 +; RV32I-NEXT: # %bb.48: +; RV32I-NEXT: mv s8, s4 +; RV32I-NEXT: .LBB10_49: +; RV32I-NEXT: slti t1, t5, 0 +; RV32I-NEXT: neg s7, t1 +; RV32I-NEXT: slti t1, s11, 0 +; RV32I-NEXT: neg t1, t1 +; RV32I-NEXT: li a5, 128 +; RV32I-NEXT: bltu a0, a5, .LBB10_51 +; RV32I-NEXT: # %bb.50: +; RV32I-NEXT: sll t2, a4, s2 +; RV32I-NEXT: and t2, t1, t2 +; RV32I-NEXT: and t2, t3, t2 +; RV32I-NEXT: mv s11, ra +; RV32I-NEXT: bnez a0, .LBB10_52 +; RV32I-NEXT: j .LBB10_53 +; RV32I-NEXT: .LBB10_51: +; RV32I-NEXT: sll t2, ra, a0 +; RV32I-NEXT: and t2, s7, t2 +; RV32I-NEXT: and t2, s5, t2 +; RV32I-NEXT: or t2, t2, s8 +; RV32I-NEXT: mv s11, ra +; RV32I-NEXT: beqz a0, .LBB10_53 +; RV32I-NEXT: .LBB10_52: +; RV32I-NEXT: mv s11, t2 +; RV32I-NEXT: .LBB10_53: +; RV32I-NEXT: lw a5, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: bgez a5, .LBB10_55 +; RV32I-NEXT: # %bb.54: +; RV32I-NEXT: srl t2, ra, s10 +; RV32I-NEXT: slli s6, s6, 1 +; RV32I-NEXT: lw a5, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll t3, s6, a5 +; RV32I-NEXT: or a5, t2, t3 +; RV32I-NEXT: sw a5, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: .LBB10_55: +; RV32I-NEXT: slti t2, t6, 0 +; RV32I-NEXT: neg s6, t2 +; RV32I-NEXT: li s10, 64 +; RV32I-NEXT: bltu a0, s10, .LBB10_57 +; RV32I-NEXT: # %bb.56: +; RV32I-NEXT: lw a5, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll t2, ra, a5 +; RV32I-NEXT: and t2, s6, t2 +; RV32I-NEXT: j .LBB10_58 +; RV32I-NEXT: .LBB10_57: +; RV32I-NEXT: sll t2, a7, a0 +; RV32I-NEXT: and t2, s7, t2 +; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t2, t2, a5 +; RV32I-NEXT: .LBB10_58: +; RV32I-NEXT: lw s4, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: mv t3, a7 +; RV32I-NEXT: beqz a0, .LBB10_60 +; RV32I-NEXT: # %bb.59: +; RV32I-NEXT: mv t3, t2 +; RV32I-NEXT: .LBB10_60: +; RV32I-NEXT: bgez t4, .LBB10_62 +; RV32I-NEXT: # %bb.61: +; RV32I-NEXT: sll a5, a6, s9 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: sw a1, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: .LBB10_62: +; RV32I-NEXT: lw t2, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: mv s1, s3 +; RV32I-NEXT: lw t4, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: bltz a1, .LBB10_65 +; RV32I-NEXT: # %bb.63: +; RV32I-NEXT: mv a1, s8 +; RV32I-NEXT: bgeu s2, s10, .LBB10_66 +; RV32I-NEXT: .LBB10_64: +; RV32I-NEXT: sll a6, a3, s2 +; RV32I-NEXT: and a6, t1, a6 +; RV32I-NEXT: or a6, a6, a1 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: bnez s2, .LBB10_67 +; RV32I-NEXT: j .LBB10_68 +; RV32I-NEXT: .LBB10_65: +; RV32I-NEXT: li a1, 192 +; RV32I-NEXT: sub a1, a1, a0 +; RV32I-NEXT: xori a1, a1, 31 +; RV32I-NEXT: lw a5, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll a1, a5, a1 +; RV32I-NEXT: or a1, s1, a1 +; RV32I-NEXT: bltu s2, s10, .LBB10_64 +; RV32I-NEXT: .LBB10_66: +; RV32I-NEXT: lw a1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll a1, a4, a1 +; RV32I-NEXT: lw a5, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: slti a6, a5, 0 +; RV32I-NEXT: neg a6, a6 +; RV32I-NEXT: and a6, a6, a1 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: beqz s2, .LBB10_68 +; RV32I-NEXT: .LBB10_67: +; RV32I-NEXT: mv a1, a6 +; RV32I-NEXT: .LBB10_68: +; RV32I-NEXT: li a5, 128 +; RV32I-NEXT: bltu a0, a5, .LBB10_73 +; RV32I-NEXT: # %bb.69: +; RV32I-NEXT: bnez a0, .LBB10_74 +; RV32I-NEXT: .LBB10_70: +; RV32I-NEXT: bltz t6, .LBB10_75 +; RV32I-NEXT: .LBB10_71: +; RV32I-NEXT: sll a1, a4, t6 +; RV32I-NEXT: lw t3, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: bgez t5, .LBB10_76 +; RV32I-NEXT: .LBB10_72: +; RV32I-NEXT: sll a5, t3, a0 +; RV32I-NEXT: lw a6, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t1, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl a6, a6, t1 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: bltu a0, s10, .LBB10_77 +; RV32I-NEXT: j .LBB10_78 +; RV32I-NEXT: .LBB10_73: +; RV32I-NEXT: lw a1, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a5, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: and a1, a5, a1 +; RV32I-NEXT: or a1, t3, a1 +; RV32I-NEXT: beqz a0, .LBB10_70 +; RV32I-NEXT: .LBB10_74: +; RV32I-NEXT: mv a7, a1 +; RV32I-NEXT: bgez t6, .LBB10_71 +; RV32I-NEXT: .LBB10_75: +; RV32I-NEXT: sll a1, t2, t4 +; RV32I-NEXT: lw a5, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl a5, s4, a5 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: lw t3, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: bltz t5, .LBB10_72 +; RV32I-NEXT: .LBB10_76: +; RV32I-NEXT: sll a5, a3, t5 +; RV32I-NEXT: bgeu a0, s10, .LBB10_78 +; RV32I-NEXT: .LBB10_77: +; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: and a1, a1, s8 +; RV32I-NEXT: or a1, a5, a1 +; RV32I-NEXT: .LBB10_78: +; RV32I-NEXT: bnez a0, .LBB10_82 +; RV32I-NEXT: # %bb.79: +; RV32I-NEXT: lw a1, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: bltz a1, .LBB10_83 +; RV32I-NEXT: .LBB10_80: +; RV32I-NEXT: sltiu a1, a0, 128 +; RV32I-NEXT: bltu a0, s10, .LBB10_84 +; RV32I-NEXT: .LBB10_81: +; RV32I-NEXT: sll a5, a4, t4 +; RV32I-NEXT: and a6, s6, a5 +; RV32I-NEXT: neg a5, a1 +; RV32I-NEXT: bnez a0, .LBB10_85 +; RV32I-NEXT: j .LBB10_86 +; RV32I-NEXT: .LBB10_82: +; RV32I-NEXT: mv t3, a1 +; RV32I-NEXT: lw a1, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: bgez a1, .LBB10_80 +; RV32I-NEXT: .LBB10_83: +; RV32I-NEXT: lw a1, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a5, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll a1, a5, a1 +; RV32I-NEXT: or s8, s1, a1 +; RV32I-NEXT: sltiu a1, a0, 128 +; RV32I-NEXT: bgeu a0, s10, .LBB10_81 +; RV32I-NEXT: .LBB10_84: +; RV32I-NEXT: sll a5, a3, a0 +; RV32I-NEXT: and a5, s7, a5 +; RV32I-NEXT: or a6, a5, s8 +; RV32I-NEXT: neg a5, a1 +; RV32I-NEXT: beqz a0, .LBB10_86 +; RV32I-NEXT: .LBB10_85: +; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: .LBB10_86: +; RV32I-NEXT: and a6, a5, t3 +; RV32I-NEXT: and a1, a5, a3 +; RV32I-NEXT: bltz t5, .LBB10_88 +; RV32I-NEXT: # %bb.87: +; RV32I-NEXT: sll a3, a4, t5 +; RV32I-NEXT: j .LBB10_89 +; RV32I-NEXT: .LBB10_88: +; RV32I-NEXT: sll a3, t2, a0 +; RV32I-NEXT: lw t1, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl t1, s4, t1 +; RV32I-NEXT: or a3, a3, t1 +; RV32I-NEXT: .LBB10_89: +; RV32I-NEXT: and a3, s5, a3 +; RV32I-NEXT: and a3, a5, a3 +; RV32I-NEXT: sll a0, a4, a0 +; RV32I-NEXT: and a0, s7, a0 +; RV32I-NEXT: and a0, s5, a0 +; RV32I-NEXT: and a0, a5, a0 +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: sb a3, 4(a2) +; RV32I-NEXT: srli a4, a0, 24 +; RV32I-NEXT: sb a4, 3(a2) +; RV32I-NEXT: srli a4, a0, 16 +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: srli a0, a3, 24 +; RV32I-NEXT: sb a0, 7(a2) +; RV32I-NEXT: srli a0, a3, 16 +; RV32I-NEXT: sb a0, 6(a2) +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a3, 5(a2) +; RV32I-NEXT: sb a6, 12(a2) +; RV32I-NEXT: sb a1, 8(a2) +; RV32I-NEXT: srli a0, a6, 24 +; RV32I-NEXT: sb a0, 15(a2) +; RV32I-NEXT: srli a0, a6, 16 +; RV32I-NEXT: sb a0, 14(a2) +; RV32I-NEXT: srli a0, a6, 8 +; RV32I-NEXT: sb a0, 13(a2) +; RV32I-NEXT: sb t0, 28(a2) +; RV32I-NEXT: srli a0, a1, 24 +; RV32I-NEXT: sb a0, 11(a2) +; RV32I-NEXT: srli a0, a1, 16 +; RV32I-NEXT: sb a0, 10(a2) +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 9(a2) +; RV32I-NEXT: sb a7, 24(a2) +; RV32I-NEXT: srli a0, t0, 24 +; RV32I-NEXT: sb a0, 31(a2) +; RV32I-NEXT: srli a0, t0, 16 +; RV32I-NEXT: sb a0, 30(a2) +; RV32I-NEXT: srli a0, t0, 8 +; RV32I-NEXT: sb a0, 29(a2) +; RV32I-NEXT: sb s11, 16(a2) +; RV32I-NEXT: srli a0, a7, 24 +; RV32I-NEXT: sb a0, 27(a2) +; RV32I-NEXT: srli a0, a7, 16 +; RV32I-NEXT: sb a0, 26(a2) +; RV32I-NEXT: srli a0, a7, 8 +; RV32I-NEXT: sb a0, 25(a2) +; RV32I-NEXT: srli a0, s11, 24 +; RV32I-NEXT: sb a0, 19(a2) +; RV32I-NEXT: srli a0, s11, 16 +; RV32I-NEXT: sb a0, 18(a2) +; RV32I-NEXT: srli a0, s11, 8 +; RV32I-NEXT: sb a0, 17(a2) +; RV32I-NEXT: sb s0, 20(a2) +; RV32I-NEXT: srli a0, s0, 24 +; RV32I-NEXT: sb a0, 23(a2) +; RV32I-NEXT: srli a0, s0, 16 +; RV32I-NEXT: sb a0, 22(a2) +; RV32I-NEXT: srli s0, s0, 8 +; RV32I-NEXT: sb s0, 21(a2) +; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 128 +; RV32I-NEXT: ret + %src = load i256, ptr %src.ptr, align 1 + %bitOff = load i256, ptr %bitOff.ptr, align 1 + %res = shl i256 %src, %bitOff + store i256 %res, ptr %dst, align 1 + ret void +} +define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; RV64I-LABEL: ashr_32bytes: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd s0, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a3, 9(a0) +; RV64I-NEXT: lbu a4, 8(a0) +; RV64I-NEXT: lbu a5, 10(a0) +; RV64I-NEXT: lbu a6, 11(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a7, a4, a3 +; RV64I-NEXT: lbu a3, 13(a0) +; RV64I-NEXT: lbu a4, 12(a0) +; RV64I-NEXT: lbu a5, 14(a0) +; RV64I-NEXT: lbu a6, 15(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a4, a4, a3 +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a5, 0(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu t0, 3(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a5, t0, a6 +; RV64I-NEXT: or t1, a5, a3 +; RV64I-NEXT: lbu a3, 5(a0) +; RV64I-NEXT: lbu a5, 4(a0) +; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu t0, 7(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a5, t0, a6 +; RV64I-NEXT: or t0, a5, a3 +; RV64I-NEXT: lbu a3, 25(a0) +; RV64I-NEXT: lbu a5, 24(a0) +; RV64I-NEXT: lbu a6, 26(a0) +; RV64I-NEXT: lbu t2, 27(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli t2, t2, 24 +; RV64I-NEXT: or a5, t2, a6 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: lbu a5, 29(a0) +; RV64I-NEXT: lbu a6, 28(a0) +; RV64I-NEXT: lbu t2, 30(a0) +; RV64I-NEXT: lbu t3, 31(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t3, t3, 24 +; RV64I-NEXT: or a6, t3, t2 +; RV64I-NEXT: or a6, a6, a5 +; RV64I-NEXT: slli a5, a6, 32 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: lbu a5, 17(a0) +; RV64I-NEXT: lbu t2, 16(a0) +; RV64I-NEXT: lbu t3, 18(a0) +; RV64I-NEXT: lbu t4, 19(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, t2 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli t4, t4, 24 +; RV64I-NEXT: or t2, t4, t3 +; RV64I-NEXT: or a5, t2, a5 +; RV64I-NEXT: lbu t2, 21(a0) +; RV64I-NEXT: lbu t3, 20(a0) +; RV64I-NEXT: lbu t4, 22(a0) +; RV64I-NEXT: lbu a0, 23(a0) +; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or t2, t2, t3 +; RV64I-NEXT: slli t4, t4, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, t4 +; RV64I-NEXT: or a0, a0, t2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a5, a0, a5 +; RV64I-NEXT: lbu a0, 1(a1) +; RV64I-NEXT: lbu t2, 0(a1) +; RV64I-NEXT: lbu t3, 2(a1) +; RV64I-NEXT: lbu t4, 3(a1) +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: or a0, a0, t2 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli t4, t4, 24 +; RV64I-NEXT: lbu t2, 5(a1) +; RV64I-NEXT: lbu t5, 4(a1) +; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or t3, t3, a0 +; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or t2, t2, t5 +; RV64I-NEXT: lbu t4, 6(a1) +; RV64I-NEXT: lbu t5, 7(a1) +; RV64I-NEXT: slli a0, a4, 32 +; RV64I-NEXT: slli a1, t0, 32 +; RV64I-NEXT: slli t4, t4, 16 +; RV64I-NEXT: slli t5, t5, 24 +; RV64I-NEXT: or a4, t5, t4 +; RV64I-NEXT: or a4, a4, t2 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a4, a4, t3 +; RV64I-NEXT: addi t3, a4, -128 +; RV64I-NEXT: addi t4, a4, -192 +; RV64I-NEXT: slli t0, a3, 1 +; RV64I-NEXT: bltz t4, .LBB11_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: sra t6, a3, t4 +; RV64I-NEXT: j .LBB11_3 +; RV64I-NEXT: .LBB11_2: +; RV64I-NEXT: srl t2, a5, t3 +; RV64I-NEXT: xori t5, t3, 63 +; RV64I-NEXT: sll t5, t0, t5 +; RV64I-NEXT: or t6, t2, t5 +; RV64I-NEXT: .LBB11_3: +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a1, a1, t1 +; RV64I-NEXT: addi a7, a4, -64 +; RV64I-NEXT: xori t2, a4, 63 +; RV64I-NEXT: bltz a7, .LBB11_5 +; RV64I-NEXT: # %bb.4: +; RV64I-NEXT: srl s2, a0, a7 +; RV64I-NEXT: j .LBB11_6 +; RV64I-NEXT: .LBB11_5: +; RV64I-NEXT: srl t1, a1, a4 +; RV64I-NEXT: slli t5, a0, 1 +; RV64I-NEXT: sll t5, t5, t2 +; RV64I-NEXT: or s2, t1, t5 +; RV64I-NEXT: .LBB11_6: +; RV64I-NEXT: negw s0, a4 +; RV64I-NEXT: sll t5, a5, s0 +; RV64I-NEXT: li s1, 64 +; RV64I-NEXT: li t1, 128 +; RV64I-NEXT: sub s1, s1, a4 +; RV64I-NEXT: bltu a4, t1, .LBB11_11 +; RV64I-NEXT: # %bb.7: +; RV64I-NEXT: bnez a4, .LBB11_12 +; RV64I-NEXT: .LBB11_8: +; RV64I-NEXT: bltz s1, .LBB11_13 +; RV64I-NEXT: .LBB11_9: +; RV64I-NEXT: sraiw a6, a6, 31 +; RV64I-NEXT: bltz t4, .LBB11_14 +; RV64I-NEXT: .LBB11_10: +; RV64I-NEXT: mv t3, a6 +; RV64I-NEXT: bltu a4, t1, .LBB11_15 +; RV64I-NEXT: j .LBB11_16 +; RV64I-NEXT: .LBB11_11: +; RV64I-NEXT: slti t6, s1, 0 +; RV64I-NEXT: neg t6, t6 +; RV64I-NEXT: and t6, t6, t5 +; RV64I-NEXT: or t6, s2, t6 +; RV64I-NEXT: beqz a4, .LBB11_8 +; RV64I-NEXT: .LBB11_12: +; RV64I-NEXT: mv a1, t6 +; RV64I-NEXT: bgez s1, .LBB11_9 +; RV64I-NEXT: .LBB11_13: +; RV64I-NEXT: sll t5, a3, s0 +; RV64I-NEXT: srli t6, a5, 1 +; RV64I-NEXT: sub s0, t1, a4 +; RV64I-NEXT: xori s0, s0, 63 +; RV64I-NEXT: srl t6, t6, s0 +; RV64I-NEXT: or t5, t5, t6 +; RV64I-NEXT: sraiw a6, a6, 31 +; RV64I-NEXT: bgez t4, .LBB11_10 +; RV64I-NEXT: .LBB11_14: +; RV64I-NEXT: sra t3, a3, t3 +; RV64I-NEXT: bgeu a4, t1, .LBB11_16 +; RV64I-NEXT: .LBB11_15: +; RV64I-NEXT: slti t3, a7, 0 +; RV64I-NEXT: srl t4, a0, a4 +; RV64I-NEXT: neg t3, t3 +; RV64I-NEXT: and t3, t3, t4 +; RV64I-NEXT: or t3, t3, t5 +; RV64I-NEXT: .LBB11_16: +; RV64I-NEXT: bnez a4, .LBB11_19 +; RV64I-NEXT: # %bb.17: +; RV64I-NEXT: bltz a7, .LBB11_20 +; RV64I-NEXT: .LBB11_18: +; RV64I-NEXT: sra a5, a3, a7 +; RV64I-NEXT: bgeu a4, t1, .LBB11_21 +; RV64I-NEXT: j .LBB11_22 +; RV64I-NEXT: .LBB11_19: +; RV64I-NEXT: mv a0, t3 +; RV64I-NEXT: bgez a7, .LBB11_18 +; RV64I-NEXT: .LBB11_20: +; RV64I-NEXT: srl a5, a5, a4 +; RV64I-NEXT: sll t0, t0, t2 +; RV64I-NEXT: or a5, a5, t0 +; RV64I-NEXT: bltu a4, t1, .LBB11_22 +; RV64I-NEXT: .LBB11_21: +; RV64I-NEXT: mv a5, a6 +; RV64I-NEXT: .LBB11_22: +; RV64I-NEXT: bltz a7, .LBB11_24 +; RV64I-NEXT: # %bb.23: +; RV64I-NEXT: mv a3, a6 +; RV64I-NEXT: bgeu a4, t1, .LBB11_25 +; RV64I-NEXT: j .LBB11_26 +; RV64I-NEXT: .LBB11_24: +; RV64I-NEXT: sra a3, a3, a4 +; RV64I-NEXT: bltu a4, t1, .LBB11_26 +; RV64I-NEXT: .LBB11_25: +; RV64I-NEXT: mv a3, a6 +; RV64I-NEXT: .LBB11_26: +; RV64I-NEXT: sb a3, 24(a2) +; RV64I-NEXT: srli a4, a3, 56 +; RV64I-NEXT: sb a4, 31(a2) +; RV64I-NEXT: srli a4, a3, 48 +; RV64I-NEXT: sb a4, 30(a2) +; RV64I-NEXT: srli a4, a3, 40 +; RV64I-NEXT: sb a4, 29(a2) +; RV64I-NEXT: srli a4, a3, 32 +; RV64I-NEXT: sb a4, 28(a2) +; RV64I-NEXT: srli a4, a3, 24 +; RV64I-NEXT: sb a4, 27(a2) +; RV64I-NEXT: srli a4, a3, 16 +; RV64I-NEXT: sb a4, 26(a2) +; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a3, 25(a2) +; RV64I-NEXT: sb a5, 16(a2) +; RV64I-NEXT: srli a3, a5, 56 +; RV64I-NEXT: sb a3, 23(a2) +; RV64I-NEXT: srli a3, a5, 48 +; RV64I-NEXT: sb a3, 22(a2) +; RV64I-NEXT: srli a3, a5, 40 +; RV64I-NEXT: sb a3, 21(a2) +; RV64I-NEXT: srli a3, a5, 32 +; RV64I-NEXT: sb a3, 20(a2) +; RV64I-NEXT: srli a3, a5, 24 +; RV64I-NEXT: sb a3, 19(a2) +; RV64I-NEXT: srli a3, a5, 16 +; RV64I-NEXT: sb a3, 18(a2) +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: sb a5, 17(a2) +; RV64I-NEXT: sb a1, 0(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: sb a3, 7(a2) +; RV64I-NEXT: srli a3, a1, 48 +; RV64I-NEXT: sb a3, 6(a2) +; RV64I-NEXT: srli a3, a1, 40 +; RV64I-NEXT: sb a3, 5(a2) +; RV64I-NEXT: srli a3, a1, 32 +; RV64I-NEXT: sb a3, 4(a2) +; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: sb a3, 3(a2) +; RV64I-NEXT: srli a3, a1, 16 +; RV64I-NEXT: sb a3, 2(a2) +; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: sb a1, 1(a2) +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: sb a1, 15(a2) +; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: sb a1, 14(a2) +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: sb a1, 13(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: sb a1, 12(a2) +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: sb a1, 11(a2) +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: sb a1, 10(a2) +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a0, 9(a2) +; RV64I-NEXT: ld s0, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV32I-LABEL: ashr_32bytes: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -128 +; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a7, 4(a0) +; RV32I-NEXT: lbu a5, 5(a0) +; RV32I-NEXT: lbu t2, 6(a0) +; RV32I-NEXT: lbu t3, 7(a0) +; RV32I-NEXT: lbu t0, 0(a0) +; RV32I-NEXT: lbu t4, 1(a0) +; RV32I-NEXT: lbu s9, 2(a0) +; RV32I-NEXT: lbu s0, 3(a0) +; RV32I-NEXT: lbu t1, 12(a0) +; RV32I-NEXT: lbu t6, 13(a0) +; RV32I-NEXT: lbu s3, 14(a0) +; RV32I-NEXT: lbu s5, 15(a0) +; RV32I-NEXT: lbu s1, 8(a0) +; RV32I-NEXT: lbu s2, 9(a0) +; RV32I-NEXT: lbu s6, 10(a0) +; RV32I-NEXT: lbu s7, 11(a0) +; RV32I-NEXT: lbu a3, 21(a0) +; RV32I-NEXT: lbu a4, 20(a0) +; RV32I-NEXT: lbu a6, 22(a0) +; RV32I-NEXT: lbu t5, 23(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli t5, t5, 24 +; RV32I-NEXT: or a4, t5, a6 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 17(a0) +; RV32I-NEXT: lbu a6, 16(a0) +; RV32I-NEXT: lbu t5, 18(a0) +; RV32I-NEXT: lbu s4, 19(a0) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or s8, a4, a6 +; RV32I-NEXT: slli t5, t5, 16 +; RV32I-NEXT: slli s4, s4, 24 +; RV32I-NEXT: or a6, s4, t5 +; RV32I-NEXT: lbu a4, 29(a0) +; RV32I-NEXT: lbu t5, 28(a0) +; RV32I-NEXT: lbu s4, 30(a0) +; RV32I-NEXT: lbu s10, 31(a0) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a4, a4, t5 +; RV32I-NEXT: slli t5, s4, 16 +; RV32I-NEXT: slli s4, s10, 24 +; RV32I-NEXT: or t5, s4, t5 +; RV32I-NEXT: or a4, t5, a4 +; RV32I-NEXT: lbu t5, 25(a0) +; RV32I-NEXT: lbu s10, 24(a0) +; RV32I-NEXT: lbu s11, 26(a0) +; RV32I-NEXT: lbu a0, 27(a0) +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: or t5, t5, s10 +; RV32I-NEXT: slli s11, s11, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, s11 +; RV32I-NEXT: or s11, a0, t5 +; RV32I-NEXT: lbu a0, 1(a1) +; RV32I-NEXT: lbu t5, 0(a1) +; RV32I-NEXT: lbu s10, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a0, a0, t5 +; RV32I-NEXT: slli s10, s10, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, s10 +; RV32I-NEXT: or a1, a1, a0 +; RV32I-NEXT: addi t5, a1, -192 +; RV32I-NEXT: addi a0, a1, -224 +; RV32I-NEXT: slli s10, a4, 1 +; RV32I-NEXT: sw s11, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t5, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltz a0, .LBB11_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sra a0, a4, a0 +; RV32I-NEXT: j .LBB11_3 +; RV32I-NEXT: .LBB11_2: +; RV32I-NEXT: srl a0, s11, t5 +; RV32I-NEXT: xori t5, t5, 31 +; RV32I-NEXT: sll t5, s10, t5 +; RV32I-NEXT: or a0, a0, t5 +; RV32I-NEXT: .LBB11_3: +; RV32I-NEXT: slli s10, t6, 8 +; RV32I-NEXT: slli s11, s3, 16 +; RV32I-NEXT: slli ra, s5, 24 +; RV32I-NEXT: or t5, a6, s8 +; RV32I-NEXT: addi s3, a1, -128 +; RV32I-NEXT: slli t6, a3, 1 +; RV32I-NEXT: addi s5, a1, -160 +; RV32I-NEXT: xori s8, s3, 31 +; RV32I-NEXT: sw t6, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltz s5, .LBB11_5 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: srl t6, a3, s5 +; RV32I-NEXT: j .LBB11_6 +; RV32I-NEXT: .LBB11_5: +; RV32I-NEXT: srl a6, t5, s3 +; RV32I-NEXT: sll t6, t6, s8 +; RV32I-NEXT: or t6, a6, t6 +; RV32I-NEXT: .LBB11_6: +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: slli s6, s6, 16 +; RV32I-NEXT: slli s7, s7, 24 +; RV32I-NEXT: or a6, s10, t1 +; RV32I-NEXT: or s8, ra, s11 +; RV32I-NEXT: neg ra, a1 +; RV32I-NEXT: lw t1, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll s11, t1, ra +; RV32I-NEXT: li s10, 160 +; RV32I-NEXT: li t1, 64 +; RV32I-NEXT: sub s10, s10, a1 +; RV32I-NEXT: sw s11, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: bgeu s3, t1, .LBB11_8 +; RV32I-NEXT: # %bb.7: +; RV32I-NEXT: slti a0, s10, 0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: and a0, a0, s11 +; RV32I-NEXT: or a0, t6, a0 +; RV32I-NEXT: .LBB11_8: +; RV32I-NEXT: slli t6, a5, 8 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t3, t3, 24 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: slli s9, s9, 16 +; RV32I-NEXT: slli s0, s0, 24 +; RV32I-NEXT: or s1, s2, s1 +; RV32I-NEXT: or s2, s7, s6 +; RV32I-NEXT: or a5, s8, a6 +; RV32I-NEXT: mv s7, t5 +; RV32I-NEXT: beqz s3, .LBB11_10 +; RV32I-NEXT: # %bb.9: +; RV32I-NEXT: mv s7, a0 +; RV32I-NEXT: .LBB11_10: +; RV32I-NEXT: or a0, t6, a7 +; RV32I-NEXT: or a7, t3, t2 +; RV32I-NEXT: or t0, t4, t0 +; RV32I-NEXT: or t2, s0, s9 +; RV32I-NEXT: or s1, s2, s1 +; RV32I-NEXT: addi t6, a1, -64 +; RV32I-NEXT: slli s8, a5, 1 +; RV32I-NEXT: addi s0, a1, -96 +; RV32I-NEXT: xori t3, t6, 31 +; RV32I-NEXT: sw t3, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltz s0, .LBB11_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: srl a6, a5, s0 +; RV32I-NEXT: j .LBB11_13 +; RV32I-NEXT: .LBB11_12: +; RV32I-NEXT: srl a6, s1, t6 +; RV32I-NEXT: sll t3, s8, t3 +; RV32I-NEXT: or a6, a6, t3 +; RV32I-NEXT: .LBB11_13: +; RV32I-NEXT: or s11, a7, a0 +; RV32I-NEXT: or t2, t2, t0 +; RV32I-NEXT: addi t4, a1, -32 +; RV32I-NEXT: xori s9, a1, 31 +; RV32I-NEXT: bltz t4, .LBB11_15 +; RV32I-NEXT: # %bb.14: +; RV32I-NEXT: srl a7, s11, t4 +; RV32I-NEXT: j .LBB11_16 +; RV32I-NEXT: .LBB11_15: +; RV32I-NEXT: srl a0, t2, a1 +; RV32I-NEXT: slli a7, s11, 1 +; RV32I-NEXT: sll a7, a7, s9 +; RV32I-NEXT: or a7, a0, a7 +; RV32I-NEXT: .LBB11_16: +; RV32I-NEXT: sll t3, s1, ra +; RV32I-NEXT: li a0, 32 +; RV32I-NEXT: sub s6, a0, a1 +; RV32I-NEXT: slti t0, s6, 0 +; RV32I-NEXT: neg t0, t0 +; RV32I-NEXT: bgeu a1, t1, .LBB11_18 +; RV32I-NEXT: # %bb.17: +; RV32I-NEXT: and a6, t0, t3 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: .LBB11_18: +; RV32I-NEXT: sw s10, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t0, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t6, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv t0, t2 +; RV32I-NEXT: beqz a1, .LBB11_20 +; RV32I-NEXT: # %bb.19: +; RV32I-NEXT: mv t0, a6 +; RV32I-NEXT: .LBB11_20: +; RV32I-NEXT: sll a6, t5, ra +; RV32I-NEXT: li a7, 96 +; RV32I-NEXT: sub s10, a7, a1 +; RV32I-NEXT: slti a7, s10, 0 +; RV32I-NEXT: neg a7, a7 +; RV32I-NEXT: li s0, 128 +; RV32I-NEXT: sub s2, s0, a1 +; RV32I-NEXT: sltiu t6, s2, 64 +; RV32I-NEXT: neg t6, t6 +; RV32I-NEXT: sw t6, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: bgeu a1, s0, .LBB11_22 +; RV32I-NEXT: # %bb.21: +; RV32I-NEXT: mv s0, t6 +; RV32I-NEXT: and t6, a7, a6 +; RV32I-NEXT: and t6, s0, t6 +; RV32I-NEXT: or s7, t0, t6 +; RV32I-NEXT: .LBB11_22: +; RV32I-NEXT: beqz a1, .LBB11_24 +; RV32I-NEXT: # %bb.23: +; RV32I-NEXT: mv t2, s7 +; RV32I-NEXT: .LBB11_24: +; RV32I-NEXT: neg t0, s2 +; RV32I-NEXT: sub t6, a0, s2 +; RV32I-NEXT: srl a0, a3, t0 +; RV32I-NEXT: sw t6, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: bgez t6, .LBB11_26 +; RV32I-NEXT: # %bb.25: +; RV32I-NEXT: srl a0, t5, t0 +; RV32I-NEXT: sub t0, t1, s2 +; RV32I-NEXT: xori t0, t0, 31 +; RV32I-NEXT: lw t6, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll t0, t6, t0 +; RV32I-NEXT: or a0, a0, t0 +; RV32I-NEXT: .LBB11_26: +; RV32I-NEXT: lw s7, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: bltu s2, t1, .LBB11_28 +; RV32I-NEXT: # %bb.27: +; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: and a7, a0, a6 +; RV32I-NEXT: mv a0, s7 +; RV32I-NEXT: bnez s2, .LBB11_29 +; RV32I-NEXT: j .LBB11_30 +; RV32I-NEXT: .LBB11_28: +; RV32I-NEXT: lw t0, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: and a7, a7, t0 +; RV32I-NEXT: or a7, a7, a0 +; RV32I-NEXT: mv a0, s7 +; RV32I-NEXT: beqz s2, .LBB11_30 +; RV32I-NEXT: .LBB11_29: +; RV32I-NEXT: mv a0, a7 +; RV32I-NEXT: .LBB11_30: +; RV32I-NEXT: bltz t4, .LBB11_32 +; RV32I-NEXT: # %bb.31: +; RV32I-NEXT: srl a7, a5, t4 +; RV32I-NEXT: j .LBB11_33 +; RV32I-NEXT: .LBB11_32: +; RV32I-NEXT: srl a7, s1, a1 +; RV32I-NEXT: sll t0, s8, s9 +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: .LBB11_33: +; RV32I-NEXT: li s8, 128 +; RV32I-NEXT: sw s9, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltz s5, .LBB11_35 +; RV32I-NEXT: # %bb.34: +; RV32I-NEXT: sra t0, a4, s5 +; RV32I-NEXT: j .LBB11_36 +; RV32I-NEXT: .LBB11_35: +; RV32I-NEXT: srl t0, s7, s3 +; RV32I-NEXT: lw t6, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll t6, t6, s9 +; RV32I-NEXT: or t0, t0, t6 +; RV32I-NEXT: .LBB11_36: +; RV32I-NEXT: sltiu t6, a1, 64 +; RV32I-NEXT: srai s9, s4, 31 +; RV32I-NEXT: bgeu s3, t1, .LBB11_44 +; RV32I-NEXT: # %bb.37: +; RV32I-NEXT: neg s0, t6 +; RV32I-NEXT: bltu a1, s8, .LBB11_45 +; RV32I-NEXT: .LBB11_38: +; RV32I-NEXT: mv s4, s1 +; RV32I-NEXT: beqz a1, .LBB11_40 +; RV32I-NEXT: .LBB11_39: +; RV32I-NEXT: mv s4, t0 +; RV32I-NEXT: .LBB11_40: +; RV32I-NEXT: sub a0, t1, a1 +; RV32I-NEXT: xori t0, a0, 31 +; RV32I-NEXT: bgez s6, .LBB11_42 +; RV32I-NEXT: # %bb.41: +; RV32I-NEXT: sll a0, a5, ra +; RV32I-NEXT: srli s1, s1, 1 +; RV32I-NEXT: srl a7, s1, t0 +; RV32I-NEXT: or t3, a0, a7 +; RV32I-NEXT: .LBB11_42: +; RV32I-NEXT: slti a0, t4, 0 +; RV32I-NEXT: neg a7, a0 +; RV32I-NEXT: sw a7, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: bltu a1, t1, .LBB11_46 +; RV32I-NEXT: # %bb.43: +; RV32I-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl a0, a5, a0 +; RV32I-NEXT: lw a7, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: slti a7, a7, 0 +; RV32I-NEXT: neg a7, a7 +; RV32I-NEXT: and a0, a7, a0 +; RV32I-NEXT: j .LBB11_47 +; RV32I-NEXT: .LBB11_44: +; RV32I-NEXT: mv t0, s9 +; RV32I-NEXT: neg s0, t6 +; RV32I-NEXT: bgeu a1, s8, .LBB11_38 +; RV32I-NEXT: .LBB11_45: +; RV32I-NEXT: and a7, s0, a7 +; RV32I-NEXT: or t0, a7, a0 +; RV32I-NEXT: mv s4, s1 +; RV32I-NEXT: bnez a1, .LBB11_39 +; RV32I-NEXT: j .LBB11_40 +; RV32I-NEXT: .LBB11_46: +; RV32I-NEXT: srl a0, s11, a1 +; RV32I-NEXT: and a0, a7, a0 +; RV32I-NEXT: or a0, a0, t3 +; RV32I-NEXT: .LBB11_47: +; RV32I-NEXT: sw t0, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv t0, s11 +; RV32I-NEXT: beqz a1, .LBB11_49 +; RV32I-NEXT: # %bb.48: +; RV32I-NEXT: mv t0, a0 +; RV32I-NEXT: .LBB11_49: +; RV32I-NEXT: sll t6, a3, ra +; RV32I-NEXT: srli a0, t5, 1 +; RV32I-NEXT: xori t3, s2, 31 +; RV32I-NEXT: bltz s10, .LBB11_51 +; RV32I-NEXT: # %bb.50: +; RV32I-NEXT: mv a7, a6 +; RV32I-NEXT: j .LBB11_52 +; RV32I-NEXT: .LBB11_51: +; RV32I-NEXT: srl a7, a0, t3 +; RV32I-NEXT: or a7, t6, a7 +; RV32I-NEXT: .LBB11_52: +; RV32I-NEXT: sll ra, a4, ra +; RV32I-NEXT: srli s1, s7, 1 +; RV32I-NEXT: lw s7, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: bltz s7, .LBB11_55 +; RV32I-NEXT: # %bb.53: +; RV32I-NEXT: lw s7, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: bgez s8, .LBB11_56 +; RV32I-NEXT: .LBB11_54: +; RV32I-NEXT: lw s8, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: sra s8, a4, s8 +; RV32I-NEXT: bltu s3, t1, .LBB11_57 +; RV32I-NEXT: j .LBB11_58 +; RV32I-NEXT: .LBB11_55: +; RV32I-NEXT: li s7, 192 +; RV32I-NEXT: sub s7, s7, a1 +; RV32I-NEXT: xori s7, s7, 31 +; RV32I-NEXT: srl s7, s1, s7 +; RV32I-NEXT: or s7, ra, s7 +; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: bltz s8, .LBB11_54 +; RV32I-NEXT: .LBB11_56: +; RV32I-NEXT: mv s8, s9 +; RV32I-NEXT: bgeu s3, t1, .LBB11_58 +; RV32I-NEXT: .LBB11_57: +; RV32I-NEXT: slti s8, s5, 0 +; RV32I-NEXT: mv t1, t2 +; RV32I-NEXT: mv t2, s6 +; RV32I-NEXT: mv s6, s1 +; RV32I-NEXT: mv s1, ra +; RV32I-NEXT: srl ra, a3, s3 +; RV32I-NEXT: neg s8, s8 +; RV32I-NEXT: and s8, s8, ra +; RV32I-NEXT: mv ra, s1 +; RV32I-NEXT: mv s1, s6 +; RV32I-NEXT: mv s6, t2 +; RV32I-NEXT: mv t2, t1 +; RV32I-NEXT: li t1, 64 +; RV32I-NEXT: or s8, s8, s7 +; RV32I-NEXT: .LBB11_58: +; RV32I-NEXT: mv s7, a3 +; RV32I-NEXT: bnez s3, .LBB11_65 +; RV32I-NEXT: # %bb.59: +; RV32I-NEXT: li s8, 128 +; RV32I-NEXT: bltu a1, s8, .LBB11_66 +; RV32I-NEXT: .LBB11_60: +; RV32I-NEXT: lw a7, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez a1, .LBB11_67 +; RV32I-NEXT: .LBB11_61: +; RV32I-NEXT: bgez s6, .LBB11_63 +; RV32I-NEXT: .LBB11_62: +; RV32I-NEXT: lw a6, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl a0, a0, a6 +; RV32I-NEXT: or a6, t6, a0 +; RV32I-NEXT: .LBB11_63: +; RV32I-NEXT: lw t0, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t6, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: bltz s10, .LBB11_68 +; RV32I-NEXT: # %bb.64: +; RV32I-NEXT: mv a0, t6 +; RV32I-NEXT: lw t3, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: bltu s2, t1, .LBB11_69 +; RV32I-NEXT: j .LBB11_70 +; RV32I-NEXT: .LBB11_65: +; RV32I-NEXT: mv s7, s8 +; RV32I-NEXT: li s8, 128 +; RV32I-NEXT: bgeu a1, s8, .LBB11_60 +; RV32I-NEXT: .LBB11_66: +; RV32I-NEXT: lw s7, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: and a7, s7, a7 +; RV32I-NEXT: or s7, t0, a7 +; RV32I-NEXT: lw a7, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: beqz a1, .LBB11_61 +; RV32I-NEXT: .LBB11_67: +; RV32I-NEXT: mv s11, s7 +; RV32I-NEXT: bltz s6, .LBB11_62 +; RV32I-NEXT: j .LBB11_63 +; RV32I-NEXT: .LBB11_68: +; RV32I-NEXT: srl a0, s1, t3 +; RV32I-NEXT: or a0, ra, a0 +; RV32I-NEXT: lw t3, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: bgeu s2, t1, .LBB11_70 +; RV32I-NEXT: .LBB11_69: +; RV32I-NEXT: lw a6, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: slti a6, a6, 0 +; RV32I-NEXT: neg a6, a6 +; RV32I-NEXT: lw s7, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: and a6, a6, s7 +; RV32I-NEXT: or a6, a0, a6 +; RV32I-NEXT: .LBB11_70: +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: bnez s2, .LBB11_73 +; RV32I-NEXT: # %bb.71: +; RV32I-NEXT: bltz s5, .LBB11_74 +; RV32I-NEXT: .LBB11_72: +; RV32I-NEXT: mv a6, s9 +; RV32I-NEXT: lw s2, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: bgeu s3, t1, .LBB11_75 +; RV32I-NEXT: j .LBB11_76 +; RV32I-NEXT: .LBB11_73: +; RV32I-NEXT: mv a0, a6 +; RV32I-NEXT: bgez s5, .LBB11_72 +; RV32I-NEXT: .LBB11_74: +; RV32I-NEXT: sra a6, a4, s3 +; RV32I-NEXT: lw s2, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: bltu s3, t1, .LBB11_76 +; RV32I-NEXT: .LBB11_75: +; RV32I-NEXT: mv a6, s9 +; RV32I-NEXT: .LBB11_76: +; RV32I-NEXT: bltu a1, s8, .LBB11_81 +; RV32I-NEXT: # %bb.77: +; RV32I-NEXT: bnez a1, .LBB11_82 +; RV32I-NEXT: .LBB11_78: +; RV32I-NEXT: bltz s2, .LBB11_83 +; RV32I-NEXT: .LBB11_79: +; RV32I-NEXT: sra a0, a4, s2 +; RV32I-NEXT: bgez t4, .LBB11_84 +; RV32I-NEXT: .LBB11_80: +; RV32I-NEXT: srl a6, t5, a1 +; RV32I-NEXT: lw s0, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll a7, a7, s0 +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: bltu a1, t1, .LBB11_85 +; RV32I-NEXT: j .LBB11_86 +; RV32I-NEXT: .LBB11_81: +; RV32I-NEXT: srl a6, a5, a1 +; RV32I-NEXT: lw s3, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: and a6, s3, a6 +; RV32I-NEXT: and a6, s0, a6 +; RV32I-NEXT: or a6, a6, a0 +; RV32I-NEXT: beqz a1, .LBB11_78 +; RV32I-NEXT: .LBB11_82: +; RV32I-NEXT: mv a5, a6 +; RV32I-NEXT: bgez s2, .LBB11_79 +; RV32I-NEXT: .LBB11_83: +; RV32I-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl a0, t0, a0 +; RV32I-NEXT: lw a6, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll a6, t3, a6 +; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: bltz t4, .LBB11_80 +; RV32I-NEXT: .LBB11_84: +; RV32I-NEXT: srl a6, a3, t4 +; RV32I-NEXT: bgeu a1, t1, .LBB11_86 +; RV32I-NEXT: .LBB11_85: +; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: and a0, a0, t6 +; RV32I-NEXT: or a0, a6, a0 +; RV32I-NEXT: .LBB11_86: +; RV32I-NEXT: bnez a1, .LBB11_91 +; RV32I-NEXT: # %bb.87: +; RV32I-NEXT: bgeu a1, s8, .LBB11_92 +; RV32I-NEXT: .LBB11_88: +; RV32I-NEXT: bltz s6, .LBB11_93 +; RV32I-NEXT: .LBB11_89: +; RV32I-NEXT: bltz s2, .LBB11_94 +; RV32I-NEXT: .LBB11_90: +; RV32I-NEXT: mv a0, s9 +; RV32I-NEXT: bltu a1, t1, .LBB11_95 +; RV32I-NEXT: j .LBB11_96 +; RV32I-NEXT: .LBB11_91: +; RV32I-NEXT: mv t5, a0 +; RV32I-NEXT: bltu a1, s8, .LBB11_88 +; RV32I-NEXT: .LBB11_92: +; RV32I-NEXT: mv t5, s9 +; RV32I-NEXT: bgez s6, .LBB11_89 +; RV32I-NEXT: .LBB11_93: +; RV32I-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: srl a0, s1, a0 +; RV32I-NEXT: or t6, ra, a0 +; RV32I-NEXT: bgez s2, .LBB11_90 +; RV32I-NEXT: .LBB11_94: +; RV32I-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: sra a0, a4, a0 +; RV32I-NEXT: bgeu a1, t1, .LBB11_96 +; RV32I-NEXT: .LBB11_95: +; RV32I-NEXT: srl a0, a3, a1 +; RV32I-NEXT: lw a6, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: and a0, a6, a0 +; RV32I-NEXT: or a0, a0, t6 +; RV32I-NEXT: .LBB11_96: +; RV32I-NEXT: bnez a1, .LBB11_100 +; RV32I-NEXT: # %bb.97: +; RV32I-NEXT: bgeu a1, s8, .LBB11_101 +; RV32I-NEXT: .LBB11_98: +; RV32I-NEXT: bltz t4, .LBB11_102 +; RV32I-NEXT: .LBB11_99: +; RV32I-NEXT: sra a0, a4, t4 +; RV32I-NEXT: bgeu a1, t1, .LBB11_103 +; RV32I-NEXT: j .LBB11_104 +; RV32I-NEXT: .LBB11_100: +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: bltu a1, s8, .LBB11_98 +; RV32I-NEXT: .LBB11_101: +; RV32I-NEXT: mv a3, s9 +; RV32I-NEXT: bgez t4, .LBB11_99 +; RV32I-NEXT: .LBB11_102: +; RV32I-NEXT: srl a0, t0, a1 +; RV32I-NEXT: lw a6, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: sll a6, t3, a6 +; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: bltu a1, t1, .LBB11_104 +; RV32I-NEXT: .LBB11_103: +; RV32I-NEXT: mv a0, s9 +; RV32I-NEXT: .LBB11_104: +; RV32I-NEXT: bgeu a1, s8, .LBB11_107 +; RV32I-NEXT: # %bb.105: +; RV32I-NEXT: bltz t4, .LBB11_108 +; RV32I-NEXT: .LBB11_106: +; RV32I-NEXT: mv a4, s9 +; RV32I-NEXT: bgeu a1, t1, .LBB11_109 +; RV32I-NEXT: j .LBB11_110 +; RV32I-NEXT: .LBB11_107: +; RV32I-NEXT: mv a0, s9 +; RV32I-NEXT: bgez t4, .LBB11_106 +; RV32I-NEXT: .LBB11_108: +; RV32I-NEXT: sra a4, a4, a1 +; RV32I-NEXT: bltu a1, t1, .LBB11_110 +; RV32I-NEXT: .LBB11_109: +; RV32I-NEXT: mv a4, s9 +; RV32I-NEXT: .LBB11_110: +; RV32I-NEXT: bltu a1, s8, .LBB11_112 +; RV32I-NEXT: # %bb.111: +; RV32I-NEXT: mv a4, s9 +; RV32I-NEXT: .LBB11_112: +; RV32I-NEXT: sb a4, 28(a2) +; RV32I-NEXT: srli a1, a4, 24 +; RV32I-NEXT: sb a1, 31(a2) +; RV32I-NEXT: srli a1, a4, 16 +; RV32I-NEXT: sb a1, 30(a2) +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a4, 29(a2) +; RV32I-NEXT: sb a0, 24(a2) +; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: sb a1, 27(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: sb a1, 26(a2) +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb a0, 25(a2) +; RV32I-NEXT: sb t5, 16(a2) +; RV32I-NEXT: srli a0, t5, 24 +; RV32I-NEXT: sb a0, 19(a2) +; RV32I-NEXT: srli a0, t5, 16 +; RV32I-NEXT: sb a0, 18(a2) +; RV32I-NEXT: srli a0, t5, 8 +; RV32I-NEXT: sb a0, 17(a2) +; RV32I-NEXT: sb a3, 20(a2) +; RV32I-NEXT: srli a0, a3, 24 +; RV32I-NEXT: sb a0, 23(a2) +; RV32I-NEXT: srli a0, a3, 16 +; RV32I-NEXT: sb a0, 22(a2) +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a3, 21(a2) +; RV32I-NEXT: sb t2, 0(a2) +; RV32I-NEXT: sb a5, 12(a2) +; RV32I-NEXT: srli a0, t2, 24 +; RV32I-NEXT: sb a0, 3(a2) +; RV32I-NEXT: srli a0, t2, 16 +; RV32I-NEXT: sb a0, 2(a2) +; RV32I-NEXT: srli a0, t2, 8 +; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: sb s11, 4(a2) +; RV32I-NEXT: sb s4, 8(a2) +; RV32I-NEXT: srli a0, a5, 24 +; RV32I-NEXT: sb a0, 15(a2) +; RV32I-NEXT: srli a0, a5, 16 +; RV32I-NEXT: sb a0, 14(a2) +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: sb a5, 13(a2) +; RV32I-NEXT: srli a0, s11, 24 +; RV32I-NEXT: sb a0, 7(a2) +; RV32I-NEXT: srli a0, s11, 16 +; RV32I-NEXT: sb a0, 6(a2) +; RV32I-NEXT: srli a0, s11, 8 +; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: srli a0, s4, 24 +; RV32I-NEXT: sb a0, 11(a2) +; RV32I-NEXT: srli a0, s4, 16 +; RV32I-NEXT: sb a0, 10(a2) +; RV32I-NEXT: srli a0, s4, 8 +; RV32I-NEXT: sb a0, 9(a2) +; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 128 +; RV32I-NEXT: ret + %src = load i256, ptr %src.ptr, align 1 + %bitOff = load i256, ptr %bitOff.ptr, align 1 + %res = ashr i256 %src, %bitOff + store i256 %res, ptr %dst, align 1 + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; ALL: {{.*}} diff --git a/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll new file mode 100644 index 0000000..c06cd5b --- /dev/null +++ b/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll @@ -0,0 +1,7825 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-NO-BMI2,X64-NO-SHLD,X64-NO-BMI2-NO-SHLD +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-NO-BMI2,X64-SHLD,X64-NO-BMI2-HAVE-SHLD +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-BMI2,X64-NO-SHLD,X64-HAVE-BMI2-NO-SHLD +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-BMI2,X64-SHLD,X64-HAVE-BMI2-HAVE-SHLD +; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X32,X32-NO-BMI2,X32-NO-SHLD,X32-NO-BMI2-NO-SHLD +; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X32,X32-NO-BMI2,X32-SHLD,X32-NO-BMI2-HAVE-SHLD +; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X32,X32-BMI2,X32-NO-SHLD,X32-HAVE-BMI2-NO-SHLD +; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X32,X32-BMI2,X32-SHLD,X32-HAVE-BMI2-HAVE-SHLD + +define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; X64-NO-BMI2-LABEL: lshr_4bytes: +; X64-NO-BMI2: # %bb.0: +; X64-NO-BMI2-NEXT: movl (%rdi), %eax +; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx +; X64-NO-BMI2-NEXT: shrl %cl, %eax +; X64-NO-BMI2-NEXT: movl %eax, (%rdx) +; X64-NO-BMI2-NEXT: retq +; +; X64-BMI2-LABEL: lshr_4bytes: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movzbl (%rsi), %eax +; X64-BMI2-NEXT: shrxl %eax, (%rdi), %eax +; X64-BMI2-NEXT: movl %eax, (%rdx) +; X64-BMI2-NEXT: retq +; +; X32-NO-BMI2-LABEL: lshr_4bytes: +; X32-NO-BMI2: # %bb.0: +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NEXT: movl (%edx), %edx +; X32-NO-BMI2-NEXT: movzbl (%ecx), %ecx +; X32-NO-BMI2-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NEXT: movl %edx, (%eax) +; X32-NO-BMI2-NEXT: retl +; +; X32-BMI2-LABEL: lshr_4bytes: +; X32-BMI2: # %bb.0: +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-BMI2-NEXT: movzbl (%edx), %edx +; X32-BMI2-NEXT: shrxl %edx, (%ecx), %ecx +; X32-BMI2-NEXT: movl %ecx, (%eax) +; X32-BMI2-NEXT: retl + %src = load i32, ptr %src.ptr, align 1 + %bitOff = load i32, ptr %bitOff.ptr, align 1 + %res = lshr i32 %src, %bitOff + store i32 %res, ptr %dst, align 1 + ret void +} +define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; X64-NO-BMI2-LABEL: shl_4bytes: +; X64-NO-BMI2: # %bb.0: +; X64-NO-BMI2-NEXT: movl (%rdi), %eax +; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx +; X64-NO-BMI2-NEXT: shll %cl, %eax +; X64-NO-BMI2-NEXT: movl %eax, (%rdx) +; X64-NO-BMI2-NEXT: retq +; +; X64-BMI2-LABEL: shl_4bytes: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movzbl (%rsi), %eax +; X64-BMI2-NEXT: shlxl %eax, (%rdi), %eax +; X64-BMI2-NEXT: movl %eax, (%rdx) +; X64-BMI2-NEXT: retq +; +; X32-NO-BMI2-LABEL: shl_4bytes: +; X32-NO-BMI2: # %bb.0: +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NEXT: movl (%edx), %edx +; X32-NO-BMI2-NEXT: movzbl (%ecx), %ecx +; X32-NO-BMI2-NEXT: shll %cl, %edx +; X32-NO-BMI2-NEXT: movl %edx, (%eax) +; X32-NO-BMI2-NEXT: retl +; +; X32-BMI2-LABEL: shl_4bytes: +; X32-BMI2: # %bb.0: +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-BMI2-NEXT: movzbl (%edx), %edx +; X32-BMI2-NEXT: shlxl %edx, (%ecx), %ecx +; X32-BMI2-NEXT: movl %ecx, (%eax) +; X32-BMI2-NEXT: retl + %src = load i32, ptr %src.ptr, align 1 + %bitOff = load i32, ptr %bitOff.ptr, align 1 + %res = shl i32 %src, %bitOff + store i32 %res, ptr %dst, align 1 + ret void +} +define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; X64-NO-BMI2-LABEL: ashr_4bytes: +; X64-NO-BMI2: # %bb.0: +; X64-NO-BMI2-NEXT: movl (%rdi), %eax +; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx +; X64-NO-BMI2-NEXT: sarl %cl, %eax +; X64-NO-BMI2-NEXT: movl %eax, (%rdx) +; X64-NO-BMI2-NEXT: retq +; +; X64-BMI2-LABEL: ashr_4bytes: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movzbl (%rsi), %eax +; X64-BMI2-NEXT: sarxl %eax, (%rdi), %eax +; X64-BMI2-NEXT: movl %eax, (%rdx) +; X64-BMI2-NEXT: retq +; +; X32-NO-BMI2-LABEL: ashr_4bytes: +; X32-NO-BMI2: # %bb.0: +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NEXT: movl (%edx), %edx +; X32-NO-BMI2-NEXT: movzbl (%ecx), %ecx +; X32-NO-BMI2-NEXT: sarl %cl, %edx +; X32-NO-BMI2-NEXT: movl %edx, (%eax) +; X32-NO-BMI2-NEXT: retl +; +; X32-BMI2-LABEL: ashr_4bytes: +; X32-BMI2: # %bb.0: +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-BMI2-NEXT: movzbl (%edx), %edx +; X32-BMI2-NEXT: sarxl %edx, (%ecx), %ecx +; X32-BMI2-NEXT: movl %ecx, (%eax) +; X32-BMI2-NEXT: retl + %src = load i32, ptr %src.ptr, align 1 + %bitOff = load i32, ptr %bitOff.ptr, align 1 + %res = ashr i32 %src, %bitOff + store i32 %res, ptr %dst, align 1 + ret void +} + +define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; X64-NO-BMI2-LABEL: lshr_8bytes: +; X64-NO-BMI2: # %bb.0: +; X64-NO-BMI2-NEXT: movq (%rdi), %rax +; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx +; X64-NO-BMI2-NEXT: shrq %cl, %rax +; X64-NO-BMI2-NEXT: movq %rax, (%rdx) +; X64-NO-BMI2-NEXT: retq +; +; X64-BMI2-LABEL: lshr_8bytes: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movzbl (%rsi), %eax +; X64-BMI2-NEXT: shrxq %rax, (%rdi), %rax +; X64-BMI2-NEXT: movq %rax, (%rdx) +; X64-BMI2-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: lshr_8bytes: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: lshr_8bytes: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esi), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%esi), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%ecx), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: lshr_8bytes: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edx), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%ecx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%edx), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 4(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_8bytes: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esi), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%esi), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%ecx), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %src = load i64, ptr %src.ptr, align 1 + %bitOff = load i64, ptr %bitOff.ptr, align 1 + %res = lshr i64 %src, %bitOff + store i64 %res, ptr %dst, align 1 + ret void +} +define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; X64-NO-BMI2-LABEL: shl_8bytes: +; X64-NO-BMI2: # %bb.0: +; X64-NO-BMI2-NEXT: movq (%rdi), %rax +; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx +; X64-NO-BMI2-NEXT: shlq %cl, %rax +; X64-NO-BMI2-NEXT: movq %rax, (%rdx) +; X64-NO-BMI2-NEXT: retq +; +; X64-BMI2-LABEL: shl_8bytes: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movzbl (%rsi), %eax +; X64-BMI2-NEXT: shlxq %rax, (%rdi), %rax +; X64-BMI2-NEXT: movq %rax, (%rdx) +; X64-BMI2-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: shl_8bytes: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: shl_8bytes: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%edx), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%ecx), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: shl_8bytes: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%edx), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%ecx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, 4(%edx), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 4(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: shl_8bytes: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%edx), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%ecx), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %src = load i64, ptr %src.ptr, align 1 + %bitOff = load i64, ptr %bitOff.ptr, align 1 + %res = shl i64 %src, %bitOff + store i64 %res, ptr %dst, align 1 + ret void +} +define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; X64-NO-BMI2-LABEL: ashr_8bytes: +; X64-NO-BMI2: # %bb.0: +; X64-NO-BMI2-NEXT: movq (%rdi), %rax +; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx +; X64-NO-BMI2-NEXT: sarq %cl, %rax +; X64-NO-BMI2-NEXT: movq %rax, (%rdx) +; X64-NO-BMI2-NEXT: retq +; +; X64-BMI2-LABEL: ashr_8bytes: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movzbl (%rsi), %eax +; X64-BMI2-NEXT: sarxq %rax, (%rdi), %rax +; X64-BMI2-NEXT: movq %rax, (%rdx) +; X64-BMI2-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: ashr_8bytes: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: sarl $31, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 4(%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: ashr_8bytes: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esi), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%esi), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%ecx), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl $31, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: ashr_8bytes: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%edx), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, (%esi), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %edx, %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: sarl $31, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_8bytes: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esi), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%esi), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%ecx), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ecx, %esi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarl $31, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %src = load i64, ptr %src.ptr, align 1 + %bitOff = load i64, ptr %bitOff.ptr, align 1 + %res = ashr i64 %src, %bitOff + store i64 %res, ptr %dst, align 1 + ret void +} + +define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: lshr_16bytes: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %rsi +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rsi +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %rsi +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rdi, %rsi +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %rcx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rcx, 8(%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rsi, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: lshr_16bytes: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rsi, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: lshr_16bytes: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rcx, (%rdi), %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rsi, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rcx, %rax, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rax, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rsi, 8(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_16bytes: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rdi, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rsi, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: lshr_16bytes: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $32, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%ecx), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb (%eax), %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%edi), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %al +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: negb %ah +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%eax), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 8(%ecx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 4(%ecx) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $32, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: lshr_16bytes: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ah +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ah +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: lshr_16bytes: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $32, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%edx), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%edx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%ecx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 8(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 4(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $32, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_16bytes: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $36, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %ecx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 8(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 12(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $36, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %src = load i128, ptr %src.ptr, align 1 + %bitOff = load i128, ptr %bitOff.ptr, align 1 + %res = lshr i128 %src, %bitOff + store i128 %res, ptr %dst, align 1 + ret void +} +define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: shl_16bytes: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rsi +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %rsi +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rsi +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi, %rsi +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %rsi +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rcx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rcx, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rsi, 8(%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: shl_16bytes: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rsi, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: shl_16bytes: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rcx, 8(%rdi), %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rcx, %rax, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rdi, %rax, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rsi, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r8, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rsi, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, 8(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: shl_16bytes: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %rax, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rax, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: shl_16bytes: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $32, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%eax), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: negb %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: decb %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %edx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%eax), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%ecx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 4(%ecx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, 12(%ecx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 8(%ecx) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $32, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: shl_16bytes: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $36, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dh +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dh +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 4(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 8(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 12(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $36, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: shl_16bytes: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $40, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%edx), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%ebx), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%eax), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel (%esp), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 4(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 8(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $40, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: shl_16bytes: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $36, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %ecx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%eax), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 8(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 12(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $36, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %src = load i128, ptr %src.ptr, align 1 + %bitOff = load i128, ptr %bitOff.ptr, align 1 + %res = shl i128 %src, %bitOff + store i128 %res, ptr %dst, align 1 + ret void +} +define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: ashr_16bytes: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %rsi +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rsi +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %rsi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: sarq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: sarq $63, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %rsi +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, 8(%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rsi, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: ashr_16bytes: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq %cl, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq $63, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rsi, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: ashr_16bytes: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rcx, (%rdi), %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rsi, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: sarxq %rcx, %rax, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: sarq $63, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rsi, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, 8(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_16bytes: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarxq %rcx, %rdi, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarq $63, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rsi, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: ashr_16bytes: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $36, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%esi), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%esi), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%esi), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: sarl $31, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: negb %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%eax), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 8(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $36, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: ashr_16bytes: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl $31, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ah +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ah +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: ashr_16bytes: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $36, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%edx), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%edx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %esi, %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: sarl $31, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ebx, %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ecx, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%edx), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel (%esp), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 8(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 4(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $36, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_16bytes: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $32, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarl $31, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ecx, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%eax), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %edx, %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, (%esp) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $32, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %src = load i128, ptr %src.ptr, align 1 + %bitOff = load i128, ptr %bitOff.ptr, align 1 + %res = ashr i128 %src, %bitOff + store i128 %res, ptr %dst, align 1 + ret void +} + +define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: lshr_32bytes: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rdx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r13, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: notb %sil +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rdx +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r14, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: subb %al, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r12, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbp, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r15, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rax), %esi +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdx +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: testb %al, %al +; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r12, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload +; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, 24(%r9) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, 16(%r9) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%r9) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%r9) +; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r10, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rdi, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r12, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r11, 24(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: lshr_32bytes: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %r9d +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %r10, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rax, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %r8, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r9d, %r13d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r12, %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r14, %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rbx, %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r14, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r13b +; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %r9b, %r13b +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r13d, %r15d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r15b +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rbx, %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r15, %rbp, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r10, %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r13b +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %rbx, %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r12, %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%r9), %r11d +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r11d, %r12d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r12b +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r12, %r14, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %rbx, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %r10, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r11b +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %r9b, %r9b +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r15, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rcx, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rcx, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rsi, 24(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 16(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, 8(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rax, %rdi, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rax, %r11, %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r12, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %r8, %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r11, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r12, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 24(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, 8(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: lshr_32bytes: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $140, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%edx), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%edx), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%ebp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ebp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%ebp), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%ecx), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, (%esp) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: negb %ah +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%eax), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: notb %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%eax), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %bl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: subb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: notb %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %al +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: subb %ah, %al +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %al +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: negb %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: decb %al +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, (%esp) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, (%esp) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %al # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 28(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 24(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 16(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 20(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 12(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 8(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $140, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $120, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ebp), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%ebp), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%ebp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ebp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ebp), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%eax), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %dh +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %dh +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%eax), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %bh +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bh +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bh +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bh, %bh +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bh, %bh +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bh, %bh +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bh, %bh +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $120, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: lshr_32bytes: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $152, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%esi), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%esi), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%esi), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%ecx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%eax), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%ebx), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%ebx), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 28(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 24(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 16(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 20(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 12(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 8(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $152, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $120, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%ecx), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%edi), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%edi), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edi, %eax, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, 12(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $120, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %src = load i256, ptr %src.ptr, align 1 + %bitOff = load i256, ptr %bitOff.ptr, align 1 + %res = lshr i256 %src, %bitOff + store i256 %res, ptr %dst, align 1 + ret void +} +define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: shl_32bytes: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rdx +; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbp, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: notb %sil +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r14, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbp, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: subb %al, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdx,%rdx), %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r13, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rax), %esi +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdx +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: testb %al, %al +; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rbx, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rbp, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r10, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r10, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, (%r9) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r15, 8(%r9) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, 24(%r9) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 16(%r9) +; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: shl_32bytes: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbp +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r9 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %rbp +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %rbp +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbp, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r13, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r12, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r11, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r14, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbx, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r15, 8(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, 16(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 24(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbp +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: shl_32bytes: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %edi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r9, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %dil +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %rax, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %r13d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r12, %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %dil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r10, %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r9, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r14, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %dil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r13b +; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %dil, %r13b +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r13d, %r15d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r15b +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r15, %rbp, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r9, %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r13b +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r10, %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r12, %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rdi), %ebx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %r12d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r12b +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r12, %r14, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r10, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r9, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %bl +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %dil, %dil +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r15, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rcx, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rcx, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rsi, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, 8(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, 24(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r9, 16(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: shl_32bytes: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbp +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r9 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rdi, %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rax, %r8, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rax, %r10, %rbp +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbp, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r12, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r13, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %r10, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r14, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %rbx, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r15, 8(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 16(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, 24(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbp +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: shl_32bytes: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $136, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%ecx), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, (%esp) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%eax), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%esi), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: notb %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %ch # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%esi), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%esi), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: notb %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %bh # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: negb %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: decb %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: subb %bh, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: negb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%eax), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %bh # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: subb %bh, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb %bh, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %ch # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 8(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 28(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 24(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 16(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 20(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $136, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: shl_32bytes: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $116, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%ecx), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ebx), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%ebx), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%eax), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%eax), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%eax), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %bh +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%eax), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dh +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dh +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bh, %bh +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bh, %bh +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 4(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 8(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 12(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, 28(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 16(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 20(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $116, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: shl_32bytes: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $156, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%edx), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%edx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%edx), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%edx), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edi), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%ecx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%esi), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %esi, %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb (%esp), %bl # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%eax), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb (%esp), %dl # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 4(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 8(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 28(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 24(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 16(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 20(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $156, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: shl_32bytes: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $124, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ebp), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%ebp), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%ebp), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%ebp), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ebp), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%eax), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $al killed $al killed $eax def $eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, (%esp) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%eax), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, (%esp) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %eax, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb (%esp), %cl # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 4(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 24(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 28(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 20(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $124, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %src = load i256, ptr %src.ptr, align 1 + %bitOff = load i256, ptr %bitOff.ptr, align 1 + %res = shl i256 %src, %bitOff + store i256 %res, ptr %dst, align 1 + ret void +} +define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { +; X64-NO-BMI2-NO-SHLD-LABEL: ashr_32bytes: +; X64-NO-BMI2-NO-SHLD: # %bb.0: +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %edx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r12, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %dl +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rax, %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: sarq %cl, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: sarq $63, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %dl +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r13, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: notb %sil +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r12,%r12), %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r9, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %dl +; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r14, %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbp, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %dl +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: subb %dl, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r13, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r13, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r15, %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rdx), %esi +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r9, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: sarq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl +; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload +; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rbp, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r12, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, 24(%r10) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, 16(%r10) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%r10) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%r10) +; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 +; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: retq +; +; X64-NO-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: +; X64-NO-BMI2-HAVE-SHLD: # %bb.0: +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbp +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq %cl, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r10, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq $63, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq %cl, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rdi, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r12, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r11, 24(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13 +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbp +; X64-NO-BMI2-HAVE-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-NO-SHLD-LABEL: ashr_32bytes: +; X64-HAVE-BMI2-NO-SHLD: # %bb.0: +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %r9d +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rcx, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rax, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: sarq $63, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b +; X64-HAVE-BMI2-NO-SHLD-NEXT: sarxq %r9, %r10, %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r12, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rbx, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r9d, %ebp +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bpl +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbp, %r13, %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %r14, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbp, %r15, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rax, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %al +; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %r9b, %al +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %r12d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r12b +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r12, %rbp, %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %r10, %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %al +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %r14, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rax, %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rbp, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r13, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%r9), %r11d +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r11d, %r13d +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r15, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %r14, %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: sarxq %r11, %r10, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r11b +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %r9b, %r9b +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %rax, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r12, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, 24(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 16(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, 8(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 +; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: retq +; +; X64-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: +; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbp +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rax, %rdi, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarq $63, %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r12d, %r12d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarxq %rax, %r11, %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r13, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %r8, %rbp +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbp +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r14, %rbp +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r15, %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarxq %rcx, %r11, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %rbp, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 24(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, 8(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbp +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq +; +; X32-NO-BMI2-NO-SHLD-LABEL: ashr_32bytes: +; X32-NO-BMI2-NO-SHLD: # %bb.0: +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: subl $144, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%ecx), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: sarl $31, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%ebp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ebp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%ebp), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: notb %dh +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%eax), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: negb %ah +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%ebp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: notb %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ebp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bh +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: subb %bh, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %al +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: subb %ah, %al +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %al +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: negb %ah +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: decb %al +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %esi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edi, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 28(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 24(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 16(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 20(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 12(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 8(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $144, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: retl +; +; X32-NO-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: +; X32-NO-BMI2-HAVE-SHLD: # %bb.0: +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $116, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ebx), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%ebx), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl $31, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%ebx), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ebx), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ebx), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%ebx), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %dh +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %dh +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%eax), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %ch, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 12(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $116, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-NO-SHLD-LABEL: ashr_32bytes: +; X32-HAVE-BMI2-NO-SHLD: # %bb.0: +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $156, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%edx), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: sarl $31, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%edx), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%edx), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ebx, %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%eax), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%eax), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %eax, %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ebx, %edi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%esi), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%eax), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %edi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 28(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 24(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 16(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 20(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 8(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $156, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: retl +; +; X32-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: +; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $132, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%ecx), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarl $31, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ecx, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebp, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ecx, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %edx, %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %esi, %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebp, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel (%esp), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, (%esp) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ecx, %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, 12(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $132, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl + %src = load i256, ptr %src.ptr, align 1 + %bitOff = load i256, ptr %bitOff.ptr, align 1 + %res = ashr i256 %src, %bitOff + store i256 %res, ptr %dst, align 1 + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; ALL: {{.*}} +; X32: {{.*}} +; X32-NO-SHLD: {{.*}} +; X32-SHLD: {{.*}} +; X64: {{.*}} +; X64-NO-SHLD: {{.*}} +; X64-SHLD: {{.*}} -- 2.7.4