From e5039aad45740a3017d774673867eccbbe6b0ce0 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Dec 2020 14:42:08 +0000 Subject: [PATCH] [X86] Regenerate bit extraction tests, cleaning up check-prefixes. As noticed on D92965, we needed to simplify the prefixes to ensure all RUNs were properly covered. We should never have a target with BMI2 without BMI1, so use that as the 'BMI level' and then check with/without TBM (all TBM targets have at least BMI1). --- llvm/test/CodeGen/X86/extract-bits.ll | 9537 +++++++++++++++--------------- llvm/test/CodeGen/X86/extract-lowbits.ll | 4740 +++++++-------- 2 files changed, 7092 insertions(+), 7185 deletions(-) diff --git a/llvm/test/CodeGen/X86/extract-bits.ll b/llvm/test/CodeGen/X86/extract-bits.ll index c128a00..4083074 100644 --- a/llvm/test/CodeGen/X86/extract-bits.ll +++ b/llvm/test/CodeGen/X86/extract-bits.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI,X86-NOBMI -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1NOTBM,X86-BMI1NOTBM -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1TBM,X86-BMI1TBM -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1TBM,X86-BMI1TBM,BMI1TBMBMI2,X86-BMI1TBMBMI2 -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1NOTBMBMI2,X86-BMI1NOTBMBMI2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI,X64-NOBMI -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1NOTBM,X64-BMI1NOTBM -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1TBM,X64-BMI1TBM -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1TBM,X64-BMI1TBM,BMI1TBMBMI2,X64-BMI1TBMBMI2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1NOTBMBMI2,X64-BMI1NOTBMBMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMINOTBM,X86-BMI1 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMITBM,X86-BMI1 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMITBM,X86-BMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMINOTBM,X86-BMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMINOTBM,X64-BMI1 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMITBM,X64-BMI1 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMITBM,X64-BMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMINOTBM,X64-BMI2 ; *Please* keep in sync with test/CodeGen/AArch64/extract-bits.ll @@ -46,22 +46,22 @@ define i32 @bextr32_a0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_a0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: orl %eax, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_a0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_a0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: orl %eax, %ecx +; X86-BMI1-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_a0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_a0: ; X64-NOBMI: # %bb.0: @@ -75,19 +75,19 @@ define i32 @bextr32_a0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_a0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_a0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_a0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_a0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 @@ -111,23 +111,23 @@ define i32 @bextr32_a0_arithmetic(i32 %val, i32 %numskipbits, i32 %numlowbits) n ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_a0_arithmetic: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: sarl %cl, %edx -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, %edx, %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_a0_arithmetic: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: sarxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_a0_arithmetic: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: sarl %cl, %edx +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, %edx, %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_a0_arithmetic: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: sarxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_a0_arithmetic: ; X64-NOBMI: # %bb.0: @@ -141,20 +141,20 @@ define i32 @bextr32_a0_arithmetic(i32 %val, i32 %numskipbits, i32 %numlowbits) n ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_a0_arithmetic: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: sarl %cl, %edi -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: bextrl %edx, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_a0_arithmetic: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: sarxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_a0_arithmetic: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: sarl %cl, %edi +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: bextrl %edx, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_a0_arithmetic: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: sarxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = ashr i32 %val, %numskipbits %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 @@ -178,22 +178,22 @@ define i32 @bextr32_a1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext % ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_a1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: orl %eax, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_a1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_a1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: orl %eax, %ecx +; X86-BMI1-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_a1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_a1_indexzext: ; X64-NOBMI: # %bb.0: @@ -207,19 +207,19 @@ define i32 @bextr32_a1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext % ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_a1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_a1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_a1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_a1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip %conv = zext i8 %numlowbits to i32 @@ -246,24 +246,24 @@ define i32 @bextr32_a2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_a2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_a2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: shrxl %edx, (%ecx), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_a2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_a2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_a2_load: ; X64-NOBMI: # %bb.0: @@ -278,19 +278,19 @@ define i32 @bextr32_a2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: andl %esi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_a2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_a2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_a2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, (%rdi), %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_a2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %shifted = lshr i32 %val, %numskipbits %onebit = shl i32 1, %numlowbits @@ -316,24 +316,24 @@ define i32 @bextr32_a3_load_indexzext(i32* %w, i8 zeroext %numskipbits, i8 zeroe ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_a3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_a3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: shrxl %edx, (%ecx), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_a3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_a3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_a3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -348,19 +348,19 @@ define i32 @bextr32_a3_load_indexzext(i32* %w, i8 zeroext %numskipbits, i8 zeroe ; X64-NOBMI-NEXT: andl %esi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_a3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_a3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_a3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, (%rdi), %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_a3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip @@ -387,22 +387,22 @@ define i32 @bextr32_a4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_a4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: orl %eax, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_a4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_a4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: orl %eax, %ecx +; X86-BMI1-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_a4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_a4_commutative: ; X64-NOBMI: # %bb.0: @@ -416,19 +416,19 @@ define i32 @bextr32_a4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_a4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_a4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_a4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_a4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 @@ -460,37 +460,37 @@ define i32 @bextr32_a5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_a5_skipextrauses: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl %al, %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %eax, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $8, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_a5_skipextrauses: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %esi -; X86-BMI1BMI2-NEXT: movl %ecx, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: addl $8, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_a5_skipextrauses: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl %al, %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %eax, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $8, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_a5_skipextrauses: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: bzhil %eax, %edx, %esi +; X86-BMI2-NEXT: movl %ecx, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_a5_skipextrauses: ; X64-NOBMI: # %bb.0: @@ -508,29 +508,29 @@ define i32 @bextr32_a5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_a5_skipextrauses: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %ebx -; X64-BMI1NOTBM-NEXT: movl %esi, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: movl %ebx, %eax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_a5_skipextrauses: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %ebx -; X64-BMI1BMI2-NEXT: movl %esi, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_a5_skipextrauses: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %ebx +; X64-BMI1-NEXT: movl %esi, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: movl %ebx, %eax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_a5_skipextrauses: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %ebx +; X64-BMI2-NEXT: movl %esi, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 @@ -578,78 +578,78 @@ define i64 @bextr64_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_a0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB7_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB7_2: -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB7_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB7_4: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl %esi, %eax -; X86-BMI1NOTBM-NEXT: andl %edi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_a0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB7_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB7_2: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB7_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB7_4: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl %esi, %eax -; X86-BMI1BMI2-NEXT: andl %edi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_a0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB7_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB7_2: +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: movb %ch, %cl +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %ch +; X86-BMI1-NEXT: je .LBB7_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB7_4: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl %esi, %eax +; X86-BMI1-NEXT: andl %edi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_a0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB7_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB7_2: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl %ebx, %ecx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB7_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB7_4: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a0: ; X64-NOBMI: # %bb.0: @@ -663,19 +663,19 @@ define i64 @bextr64_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_a0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_a0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_a0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_a0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 @@ -721,80 +721,80 @@ define i64 @bextr64_a0_arithmetic(i64 %val, i64 %numskipbits, i64 %numlowbits) n ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_a0_arithmetic: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %esi -; X86-BMI1NOTBM-NEXT: sarl %cl, %esi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB8_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: sarl $31, %eax -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: movl %eax, %esi -; X86-BMI1NOTBM-NEXT: .LBB8_2: -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB8_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB8_4: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl %edi, %eax -; X86-BMI1NOTBM-NEXT: andl %esi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_a0_arithmetic: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: sarxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB8_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: sarl $31, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl %eax, %edi -; X86-BMI1BMI2-NEXT: .LBB8_2: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB8_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB8_4: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl %esi, %eax -; X86-BMI1BMI2-NEXT: andl %edi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_a0_arithmetic: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: sarl %cl, %esi +; X86-BMI1-NEXT: shrdl %cl, %eax, %edi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB8_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: sarl $31, %eax +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: .LBB8_2: +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: movb %ch, %cl +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %ch +; X86-BMI1-NEXT: je .LBB8_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB8_4: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl %edi, %eax +; X86-BMI1-NEXT: andl %esi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_a0_arithmetic: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: sarxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB8_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: sarl $31, %eax +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: .LBB8_2: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl %ebx, %ecx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB8_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB8_4: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a0_arithmetic: ; X64-NOBMI: # %bb.0: @@ -808,20 +808,20 @@ define i64 @bextr64_a0_arithmetic(i64 %val, i64 %numskipbits, i64 %numlowbits) n ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_a0_arithmetic: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: sarq %cl, %rdi -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: bextrq %rdx, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_a0_arithmetic: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: sarxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_a0_arithmetic: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: sarq %cl, %rdi +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: bextrq %rdx, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_a0_arithmetic: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: sarxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %shifted = ashr i64 %val, %numskipbits %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 @@ -866,78 +866,78 @@ define i64 @bextr64_a1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext % ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_a1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB9_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB9_2: -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB9_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB9_4: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl %esi, %eax -; X86-BMI1NOTBM-NEXT: andl %edi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_a1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB9_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB9_2: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB9_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB9_4: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl %esi, %eax -; X86-BMI1BMI2-NEXT: andl %edi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_a1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB9_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB9_2: +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: movb %ch, %cl +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %ch +; X86-BMI1-NEXT: je .LBB9_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB9_4: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl %esi, %eax +; X86-BMI1-NEXT: andl %edi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_a1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB9_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB9_2: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl %ebx, %ecx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB9_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB9_4: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a1_indexzext: ; X64-NOBMI: # %bb.0: @@ -951,21 +951,21 @@ define i64 @bextr64_a1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext % ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_a1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_a1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $edx killed $edx def $rdx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_a1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_a1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $edx killed $edx def $rdx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip %conv = zext i8 %numlowbits to i64 @@ -1013,80 +1013,80 @@ define i64 @bextr64_a2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_a2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %esi -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB10_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB10_2: -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB10_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB10_4: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl %esi, %eax -; X86-BMI1NOTBM-NEXT: andl %edi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_a2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl (%eax), %esi -; X86-BMI1BMI2-NEXT: movl 4(%eax), %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB10_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB10_2: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB10_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB10_4: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl %esi, %eax -; X86-BMI1BMI2-NEXT: andl %edi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_a2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %esi +; X86-BMI1-NEXT: movl 4(%eax), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB10_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB10_2: +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: movb %ch, %cl +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %ch +; X86-BMI1-NEXT: je .LBB10_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB10_4: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl %esi, %eax +; X86-BMI1-NEXT: andl %edi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_a2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%eax), %esi +; X86-BMI2-NEXT: movl 4(%eax), %eax +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB10_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB10_2: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl %ebx, %ecx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB10_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB10_4: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a2_load: ; X64-NOBMI: # %bb.0: @@ -1101,19 +1101,19 @@ define i64 @bextr64_a2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: andq %rsi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_a2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_a2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_a2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_a2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %shifted = lshr i64 %val, %numskipbits %onebit = shl i64 1, %numlowbits @@ -1160,80 +1160,80 @@ define i64 @bextr64_a3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_a3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %esi -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB11_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB11_2: -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB11_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB11_4: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl %esi, %eax -; X86-BMI1NOTBM-NEXT: andl %edi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_a3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl (%eax), %esi -; X86-BMI1BMI2-NEXT: movl 4(%eax), %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB11_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB11_2: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB11_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB11_4: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl %esi, %eax -; X86-BMI1BMI2-NEXT: andl %edi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_a3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %esi +; X86-BMI1-NEXT: movl 4(%eax), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB11_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB11_2: +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: movb %ch, %cl +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %ch +; X86-BMI1-NEXT: je .LBB11_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB11_4: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl %esi, %eax +; X86-BMI1-NEXT: andl %edi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_a3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%eax), %esi +; X86-BMI2-NEXT: movl 4(%eax), %eax +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB11_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB11_2: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl %ebx, %ecx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB11_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB11_4: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -1248,21 +1248,21 @@ define i64 @bextr64_a3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe ; X64-NOBMI-NEXT: andq %rsi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_a3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_a3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $edx killed $edx def $rdx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_a3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_a3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $edx killed $edx def $rdx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip @@ -1310,78 +1310,78 @@ define i64 @bextr64_a4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_a4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB12_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: .LBB12_2: -; X86-BMI1NOTBM-NEXT: movl $1, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %esi, %edi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB12_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi -; X86-BMI1NOTBM-NEXT: .LBB12_4: -; X86-BMI1NOTBM-NEXT: addl $-1, %esi -; X86-BMI1NOTBM-NEXT: adcl $-1, %edi -; X86-BMI1NOTBM-NEXT: andl %esi, %eax -; X86-BMI1NOTBM-NEXT: andl %edi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_a4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB12_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB12_2: -; X86-BMI1BMI2-NEXT: movl $1, %edi -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %esi -; X86-BMI1BMI2-NEXT: shlxl %ebx, %edi, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB12_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ecx, %esi -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB12_4: -; X86-BMI1BMI2-NEXT: addl $-1, %ecx -; X86-BMI1BMI2-NEXT: adcl $-1, %esi -; X86-BMI1BMI2-NEXT: andl %ecx, %eax -; X86-BMI1BMI2-NEXT: andl %esi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_a4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %esi, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB12_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: .LBB12_2: +; X86-BMI1-NEXT: movl $1, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: movb %ch, %cl +; X86-BMI1-NEXT: shldl %cl, %esi, %edi +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: testb $32, %ch +; X86-BMI1-NEXT: je .LBB12_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: .LBB12_4: +; X86-BMI1-NEXT: addl $-1, %esi +; X86-BMI1-NEXT: adcl $-1, %edi +; X86-BMI1-NEXT: andl %esi, %eax +; X86-BMI1-NEXT: andl %edi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_a4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB12_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB12_2: +; X86-BMI2-NEXT: movl $1, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl %ebx, %ecx +; X86-BMI2-NEXT: shldl %cl, %edi, %esi +; X86-BMI2-NEXT: shlxl %ebx, %edi, %ecx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB12_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: .LBB12_4: +; X86-BMI2-NEXT: addl $-1, %ecx +; X86-BMI2-NEXT: adcl $-1, %esi +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a4_commutative: ; X64-NOBMI: # %bb.0: @@ -1395,19 +1395,19 @@ define i64 @bextr64_a4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_a4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_a4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_a4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_a4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 @@ -1466,104 +1466,104 @@ define i64 @bextr64_a5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_a5_skipextrauses: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %esi, %ebp -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB13_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %ebp, %ebx -; X86-BMI1NOTBM-NEXT: xorl %ebp, %ebp -; X86-BMI1NOTBM-NEXT: .LBB13_2: -; X86-BMI1NOTBM-NEXT: movl $1, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: movl %edx, %ecx -; X86-BMI1NOTBM-NEXT: shldl %cl, %esi, %edi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %dl -; X86-BMI1NOTBM-NEXT: je .LBB13_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi -; X86-BMI1NOTBM-NEXT: .LBB13_4: -; X86-BMI1NOTBM-NEXT: addl $-1, %esi -; X86-BMI1NOTBM-NEXT: adcl $-1, %edi -; X86-BMI1NOTBM-NEXT: andl %ebx, %esi -; X86-BMI1NOTBM-NEXT: andl %ebp, %edi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_a5_skipextrauses: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %ebx -; X86-BMI1BMI2-NEXT: shrxl %eax, %esi, %ebp -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB13_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %ebp, %ebx -; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp -; X86-BMI1BMI2-NEXT: .LBB13_2: -; X86-BMI1BMI2-NEXT: movl $1, %edi -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: movl %edx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %esi -; X86-BMI1BMI2-NEXT: shlxl %edx, %edi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %dl -; X86-BMI1BMI2-NEXT: je .LBB13_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB13_4: -; X86-BMI1BMI2-NEXT: addl $-1, %edi -; X86-BMI1BMI2-NEXT: adcl $-1, %esi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi -; X86-BMI1BMI2-NEXT: andl %ebp, %esi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: addl $12, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: popl %ebp -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_a5_skipextrauses: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %esi, %ebp +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shrl %cl, %ebp +; X86-BMI1-NEXT: shrdl %cl, %esi, %ebx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: je .LBB13_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %ebp, %ebx +; X86-BMI1-NEXT: xorl %ebp, %ebp +; X86-BMI1-NEXT: .LBB13_2: +; X86-BMI1-NEXT: movl $1, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: movl %edx, %ecx +; X86-BMI1-NEXT: shldl %cl, %esi, %edi +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: testb $32, %dl +; X86-BMI1-NEXT: je .LBB13_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: .LBB13_4: +; X86-BMI1-NEXT: addl $-1, %esi +; X86-BMI1-NEXT: adcl $-1, %edi +; X86-BMI1-NEXT: andl %ebx, %esi +; X86-BMI1-NEXT: andl %ebp, %edi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_a5_skipextrauses: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $12, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: shrdl %cl, %esi, %ebx +; X86-BMI2-NEXT: shrxl %eax, %esi, %ebp +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB13_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB13_2: +; X86-BMI2-NEXT: movl $1, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: shldl %cl, %edi, %esi +; X86-BMI2-NEXT: shlxl %edx, %edi, %edi +; X86-BMI2-NEXT: testb $32, %dl +; X86-BMI2-NEXT: je .LBB13_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB13_4: +; X86-BMI2-NEXT: addl $-1, %edi +; X86-BMI2-NEXT: adcl $-1, %esi +; X86-BMI2-NEXT: andl %ebx, %edi +; X86-BMI2-NEXT: andl %ebp, %esi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: addl $12, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a5_skipextrauses: ; X64-NOBMI: # %bb.0: @@ -1581,29 +1581,29 @@ define i64 @bextr64_a5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_a5_skipextrauses: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rbx -; X64-BMI1NOTBM-NEXT: movq %rsi, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_a5_skipextrauses: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rbx -; X64-BMI1BMI2-NEXT: movq %rsi, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: movq %rbx, %rax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_a5_skipextrauses: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rbx +; X64-BMI1-NEXT: movq %rsi, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_a5_skipextrauses: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rbx +; X64-BMI2-NEXT: movq %rsi, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: movq %rbx, %rax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 @@ -1647,61 +1647,61 @@ define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_a0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB14_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %esi -; X86-BMI1NOTBM-NEXT: .LBB14_2: -; X86-BMI1NOTBM-NEXT: movl $1, %edi -; X86-BMI1NOTBM-NEXT: movl %edx, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %edi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %dl -; X86-BMI1NOTBM-NEXT: jne .LBB14_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: .LBB14_4: -; X86-BMI1NOTBM-NEXT: decl %eax -; X86-BMI1NOTBM-NEXT: andl %esi, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_a0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB14_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB14_2: -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: jne .LBB14_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB14_4: -; X86-BMI1BMI2-NEXT: decl %eax -; X86-BMI1BMI2-NEXT: andl %edx, %eax -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_a0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: shrdl %cl, %edi, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB14_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: .LBB14_2: +; X86-BMI1-NEXT: movl $1, %edi +; X86-BMI1-NEXT: movl %edx, %ecx +; X86-BMI1-NEXT: shll %cl, %edi +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: testb $32, %dl +; X86-BMI1-NEXT: jne .LBB14_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: .LBB14_4: +; X86-BMI1-NEXT: decl %eax +; X86-BMI1-NEXT: andl %esi, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_a0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB14_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edx +; X86-BMI2-NEXT: .LBB14_2: +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: jne .LBB14_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI2-NEXT: .LBB14_4: +; X86-BMI2-NEXT: decl %eax +; X86-BMI2-NEXT: andl %edx, %eax +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_a0: ; X64-NOBMI: # %bb.0: @@ -1716,20 +1716,20 @@ define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_a0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_a0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_a0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_a0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 @@ -1765,44 +1765,44 @@ define i32 @bextr64_32_a1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_a1: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB15_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB15_2: -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_a1: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB15_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB15_2: -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_a1: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB15_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB15_2: +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_a1: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB15_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB15_2: +; X86-BMI2-NEXT: bzhil %eax, %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_a1: ; X64-NOBMI: # %bb.0: @@ -1816,20 +1816,20 @@ define i32 @bextr64_32_a1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_a1: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_a1: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_a1: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_a1: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %truncshifted = trunc i64 %shifted to i32 %onebit = shl i32 1, %numlowbits @@ -1870,54 +1870,54 @@ define i32 @bextr64_32_a1_trunc_extrause(i64 %val, i64 %numskipbits, i32 %numlow ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_a1_trunc_extrause: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB16_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %esi -; X86-BMI1NOTBM-NEXT: .LBB16_2: -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: shll $8, %ebx -; X86-BMI1NOTBM-NEXT: bextrl %ebx, %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_a1_trunc_extrause: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB16_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %esi -; X86-BMI1BMI2-NEXT: .LBB16_2: -; X86-BMI1BMI2-NEXT: movl %esi, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_a1_trunc_extrause: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: shrdl %cl, %edx, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB16_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: .LBB16_2: +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: shll $8, %ebx +; X86-BMI1-NEXT: bextrl %ebx, %esi, %eax +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_a1_trunc_extrause: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB16_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi +; X86-BMI2-NEXT: .LBB16_2: +; X86-BMI2-NEXT: movl %esi, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_a1_trunc_extrause: ; X64-NOBMI: # %bb.0: @@ -1941,39 +1941,39 @@ define i32 @bextr64_32_a1_trunc_extrause(i64 %val, i64 %numskipbits, i32 %numlow ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_a1_trunc_extrause: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %edx, %ebp -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: movq %rdi, %rbx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movl %ebx, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: shll $8, %ebp -; X64-BMI1NOTBM-NEXT: bextrl %ebp, %ebx, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_a1_trunc_extrause: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %edx, %ebp -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rbx -; X64-BMI1BMI2-NEXT: movl %ebx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebp, %ebx, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_a1_trunc_extrause: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %edx, %ebp +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: movq %rdi, %rbx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movl %ebx, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: shll $8, %ebp +; X64-BMI1-NEXT: bextrl %ebp, %ebx, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_a1_trunc_extrause: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %edx, %ebp +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rbx +; X64-BMI2-NEXT: movl %ebx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebp, %ebx, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %truncshifted = trunc i64 %shifted to i32 call void @use32(i32 %truncshifted) @@ -2011,44 +2011,44 @@ define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_a2: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB17_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB17_2: -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_a2: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB17_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB17_2: -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_a2: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB17_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB17_2: +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_a2: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB17_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB17_2: +; X86-BMI2-NEXT: bzhil %eax, %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_a2: ; X64-NOBMI: # %bb.0: @@ -2062,20 +2062,20 @@ define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_a2: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_a2: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_a2: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_a2: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 @@ -2119,61 +2119,61 @@ define i32 @bextr64_32_a3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_a3: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB18_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %esi -; X86-BMI1NOTBM-NEXT: .LBB18_2: -; X86-BMI1NOTBM-NEXT: movl $1, %edi -; X86-BMI1NOTBM-NEXT: movl %edx, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %edi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %dl -; X86-BMI1NOTBM-NEXT: jne .LBB18_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: .LBB18_4: -; X86-BMI1NOTBM-NEXT: decl %eax -; X86-BMI1NOTBM-NEXT: andl %esi, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_a3: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB18_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB18_2: -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: jne .LBB18_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB18_4: -; X86-BMI1BMI2-NEXT: decl %eax -; X86-BMI1BMI2-NEXT: andl %edx, %eax -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_a3: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: shrdl %cl, %edi, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB18_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: .LBB18_2: +; X86-BMI1-NEXT: movl $1, %edi +; X86-BMI1-NEXT: movl %edx, %ecx +; X86-BMI1-NEXT: shll %cl, %edi +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: testb $32, %dl +; X86-BMI1-NEXT: jne .LBB18_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: .LBB18_4: +; X86-BMI1-NEXT: decl %eax +; X86-BMI1-NEXT: andl %esi, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_a3: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB18_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edx +; X86-BMI2-NEXT: .LBB18_2: +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: jne .LBB18_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI2-NEXT: .LBB18_4: +; X86-BMI2-NEXT: decl %eax +; X86-BMI2-NEXT: andl %edx, %eax +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_a3: ; X64-NOBMI: # %bb.0: @@ -2188,20 +2188,20 @@ define i32 @bextr64_32_a3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_a3: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_a3: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_a3: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_a3: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, 4294967295 @@ -2230,22 +2230,22 @@ define i32 @bextr32_b0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_b0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: orl %eax, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_b0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_b0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: orl %eax, %ecx +; X86-BMI1-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_b0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_b0: ; X64-NOBMI: # %bb.0: @@ -2259,19 +2259,19 @@ define i32 @bextr32_b0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_b0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_b0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_b0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_b0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %notmask = shl i32 -1, %numlowbits %mask = xor i32 %notmask, -1 @@ -2295,22 +2295,22 @@ define i32 @bextr32_b1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext % ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_b1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: orl %eax, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_b1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_b1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: orl %eax, %ecx +; X86-BMI1-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_b1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_b1_indexzext: ; X64-NOBMI: # %bb.0: @@ -2324,19 +2324,19 @@ define i32 @bextr32_b1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext % ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_b1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_b1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_b1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_b1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip %conv = zext i8 %numlowbits to i32 @@ -2363,24 +2363,24 @@ define i32 @bextr32_b2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_b2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_b2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: shrxl %edx, (%ecx), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_b2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_b2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_b2_load: ; X64-NOBMI: # %bb.0: @@ -2395,19 +2395,19 @@ define i32 @bextr32_b2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: andl %esi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_b2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_b2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_b2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, (%rdi), %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_b2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %shifted = lshr i32 %val, %numskipbits %notmask = shl i32 -1, %numlowbits @@ -2433,24 +2433,24 @@ define i32 @bextr32_b3_load_indexzext(i32* %w, i8 zeroext %numskipbits, i8 zeroe ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_b3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_b3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: shrxl %edx, (%ecx), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_b3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_b3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_b3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -2465,19 +2465,19 @@ define i32 @bextr32_b3_load_indexzext(i32* %w, i8 zeroext %numskipbits, i8 zeroe ; X64-NOBMI-NEXT: andl %esi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_b3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_b3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_b3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, (%rdi), %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_b3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip @@ -2504,22 +2504,22 @@ define i32 @bextr32_b4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_b4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: orl %eax, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_b4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_b4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: orl %eax, %ecx +; X86-BMI1-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_b4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_b4_commutative: ; X64-NOBMI: # %bb.0: @@ -2533,19 +2533,19 @@ define i32 @bextr32_b4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_b4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_b4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_b4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_b4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %notmask = shl i32 -1, %numlowbits %mask = xor i32 %notmask, -1 @@ -2577,37 +2577,37 @@ define i32 @bextr32_b5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_b5_skipextrauses: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl %al, %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %eax, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $8, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_b5_skipextrauses: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %esi -; X86-BMI1BMI2-NEXT: movl %ecx, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: addl $8, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_b5_skipextrauses: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl %al, %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %eax, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $8, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_b5_skipextrauses: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: bzhil %eax, %edx, %esi +; X86-BMI2-NEXT: movl %ecx, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_b5_skipextrauses: ; X64-NOBMI: # %bb.0: @@ -2625,29 +2625,29 @@ define i32 @bextr32_b5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_b5_skipextrauses: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %ebx -; X64-BMI1NOTBM-NEXT: movl %esi, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: movl %ebx, %eax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_b5_skipextrauses: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %ebx -; X64-BMI1BMI2-NEXT: movl %esi, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_b5_skipextrauses: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %ebx +; X64-BMI1-NEXT: movl %esi, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: movl %ebx, %eax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_b5_skipextrauses: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %ebx +; X64-BMI2-NEXT: movl %esi, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %notmask = shl i32 -1, %numlowbits %mask = xor i32 %notmask, -1 @@ -2699,70 +2699,70 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_b0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB25_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: .LBB25_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB25_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB25_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_b0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB25_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB25_2: -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB25_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ecx, %esi -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB25_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx -; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_b0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB25_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: .LBB25_2: +; X86-BMI1-NEXT: movl $-1, %edi +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shll %cl, %ebx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: je .LBB25_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %edi +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB25_4: +; X86-BMI1-NEXT: andnl %edx, %edi, %edx +; X86-BMI1-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_b0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB25_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB25_2: +; X86-BMI2-NEXT: movl $-1, %esi +; X86-BMI2-NEXT: shlxl %ebx, %esi, %ecx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB25_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: .LBB25_4: +; X86-BMI2-NEXT: andnl %edx, %esi, %edx +; X86-BMI2-NEXT: andnl %eax, %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b0: ; X64-NOBMI: # %bb.0: @@ -2776,19 +2776,19 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_b0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_b0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_b0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_b0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %notmask = shl i64 -1, %numlowbits %mask = xor i64 %notmask, -1 @@ -2837,70 +2837,70 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext % ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_b1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB26_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: .LBB26_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB26_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB26_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_b1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB26_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB26_2: -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB26_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ecx, %esi -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB26_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx -; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_b1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB26_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: .LBB26_2: +; X86-BMI1-NEXT: movl $-1, %edi +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shll %cl, %ebx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: je .LBB26_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %edi +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB26_4: +; X86-BMI1-NEXT: andnl %edx, %edi, %edx +; X86-BMI1-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_b1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB26_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB26_2: +; X86-BMI2-NEXT: movl $-1, %esi +; X86-BMI2-NEXT: shlxl %ebx, %esi, %ecx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB26_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: .LBB26_4: +; X86-BMI2-NEXT: andnl %edx, %esi, %edx +; X86-BMI2-NEXT: andnl %eax, %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b1_indexzext: ; X64-NOBMI: # %bb.0: @@ -2914,21 +2914,21 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext % ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_b1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_b1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $edx killed $edx def $rdx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_b1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_b1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $edx killed $edx def $rdx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip %conv = zext i8 %numlowbits to i64 @@ -2980,72 +2980,72 @@ define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_b2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl (%edx), %esi -; X86-BMI1NOTBM-NEXT: movl 4(%edx), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB27_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: .LBB27_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB27_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB27_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_b2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl (%edx), %eax -; X86-BMI1BMI2-NEXT: movl 4(%edx), %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB27_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB27_2: -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB27_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ecx, %esi -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB27_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx -; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_b2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl (%edx), %esi +; X86-BMI1-NEXT: movl 4(%edx), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB27_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: .LBB27_2: +; X86-BMI1-NEXT: movl $-1, %edi +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shll %cl, %ebx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: je .LBB27_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %edi +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB27_4: +; X86-BMI1-NEXT: andnl %edx, %edi, %edx +; X86-BMI1-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_b2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: movl 4(%edx), %esi +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: shrdl %cl, %esi, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB27_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB27_2: +; X86-BMI2-NEXT: movl $-1, %esi +; X86-BMI2-NEXT: shlxl %ebx, %esi, %ecx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB27_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: .LBB27_4: +; X86-BMI2-NEXT: andnl %edx, %esi, %edx +; X86-BMI2-NEXT: andnl %eax, %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b2_load: ; X64-NOBMI: # %bb.0: @@ -3060,19 +3060,19 @@ define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: andq %rsi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_b2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_b2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_b2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_b2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %shifted = lshr i64 %val, %numskipbits %notmask = shl i64 -1, %numlowbits @@ -3123,72 +3123,72 @@ define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_b3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl (%edx), %esi -; X86-BMI1NOTBM-NEXT: movl 4(%edx), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB28_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: .LBB28_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB28_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB28_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_b3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl (%edx), %eax -; X86-BMI1BMI2-NEXT: movl 4(%edx), %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB28_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB28_2: -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB28_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ecx, %esi -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB28_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx -; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_b3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl (%edx), %esi +; X86-BMI1-NEXT: movl 4(%edx), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB28_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: .LBB28_2: +; X86-BMI1-NEXT: movl $-1, %edi +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shll %cl, %ebx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: je .LBB28_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %edi +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB28_4: +; X86-BMI1-NEXT: andnl %edx, %edi, %edx +; X86-BMI1-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_b3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: movl 4(%edx), %esi +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: shrdl %cl, %esi, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB28_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB28_2: +; X86-BMI2-NEXT: movl $-1, %esi +; X86-BMI2-NEXT: shlxl %ebx, %esi, %ecx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB28_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: .LBB28_4: +; X86-BMI2-NEXT: andnl %edx, %esi, %edx +; X86-BMI2-NEXT: andnl %eax, %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -3203,21 +3203,21 @@ define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe ; X64-NOBMI-NEXT: andq %rsi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_b3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_b3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $edx killed $edx def $rdx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_b3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_b3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $edx killed $edx def $rdx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip @@ -3269,70 +3269,70 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_b4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB29_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: .LBB29_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB29_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB29_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_b4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB29_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB29_2: -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB29_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ecx, %esi -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB29_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx -; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_b4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB29_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: .LBB29_2: +; X86-BMI1-NEXT: movl $-1, %edi +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shll %cl, %ebx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: je .LBB29_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %edi +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB29_4: +; X86-BMI1-NEXT: andnl %edx, %edi, %edx +; X86-BMI1-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_b4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB29_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB29_2: +; X86-BMI2-NEXT: movl $-1, %esi +; X86-BMI2-NEXT: shlxl %ebx, %esi, %ecx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB29_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: .LBB29_4: +; X86-BMI2-NEXT: andnl %edx, %esi, %edx +; X86-BMI2-NEXT: andnl %eax, %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b4_commutative: ; X64-NOBMI: # %bb.0: @@ -3346,19 +3346,19 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_b4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_b4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_b4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_b4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %notmask = shl i64 -1, %numlowbits %mask = xor i64 %notmask, -1 @@ -3419,96 +3419,96 @@ define i64 @bextr64_b5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_b5_skipextrauses: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %ebx, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB30_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi -; X86-BMI1NOTBM-NEXT: .LBB30_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl %edx, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebp -; X86-BMI1NOTBM-NEXT: testb $32, %dl -; X86-BMI1NOTBM-NEXT: je .LBB30_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebp, %ebx -; X86-BMI1NOTBM-NEXT: xorl %ebp, %ebp -; X86-BMI1NOTBM-NEXT: .LBB30_4: -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %esi -; X86-BMI1NOTBM-NEXT: andnl %edi, %ebp, %edi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_b5_skipextrauses: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB30_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB30_2: -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB30_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB30_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %esi -; X86-BMI1BMI2-NEXT: andnl %eax, %edi, %edi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ecx -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: addl $12, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: popl %ebp -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_b5_skipextrauses: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %ebx, %esi +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: shrdl %cl, %ebx, %edi +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: je .LBB30_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: .LBB30_2: +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: movl $-1, %ebp +; X86-BMI1-NEXT: movl %edx, %ecx +; X86-BMI1-NEXT: shll %cl, %ebp +; X86-BMI1-NEXT: testb $32, %dl +; X86-BMI1-NEXT: je .LBB30_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebp, %ebx +; X86-BMI1-NEXT: xorl %ebp, %ebp +; X86-BMI1-NEXT: .LBB30_4: +; X86-BMI1-NEXT: andnl %esi, %ebx, %esi +; X86-BMI1-NEXT: andnl %edi, %ebp, %edi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_b5_skipextrauses: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $12, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB30_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB30_2: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-BMI2-NEXT: movl $-1, %esi +; X86-BMI2-NEXT: shlxl %ebx, %esi, %edi +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB30_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB30_4: +; X86-BMI2-NEXT: andnl %edx, %esi, %esi +; X86-BMI2-NEXT: andnl %eax, %edi, %edi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ecx +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: addl $12, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b5_skipextrauses: ; X64-NOBMI: # %bb.0: @@ -3526,29 +3526,29 @@ define i64 @bextr64_b5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_b5_skipextrauses: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rbx -; X64-BMI1NOTBM-NEXT: movq %rsi, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_b5_skipextrauses: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rbx -; X64-BMI1BMI2-NEXT: movq %rsi, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: movq %rbx, %rax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_b5_skipextrauses: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rbx +; X64-BMI1-NEXT: movq %rsi, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_b5_skipextrauses: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rbx +; X64-BMI2-NEXT: movq %rsi, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: movq %rbx, %rax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %notmask = shl i64 -1, %numlowbits %mask = xor i64 %notmask, -1 @@ -3592,59 +3592,59 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_b0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB31_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB31_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: jne .LBB31_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ecx -; X86-BMI1NOTBM-NEXT: .LBB31_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %ecx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_b0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB31_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB31_2: -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: jne .LBB31_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shlxl %eax, %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB31_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %ecx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_b0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB31_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB31_2: +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: jne .LBB31_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %ecx +; X86-BMI1-NEXT: .LBB31_4: +; X86-BMI1-NEXT: andnl %edx, %ecx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_b0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB31_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB31_2: +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB31_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X86-BMI2-NEXT: .LBB31_4: +; X86-BMI2-NEXT: andnl %edx, %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_b0: ; X64-NOBMI: # %bb.0: @@ -3659,20 +3659,20 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_b0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_b0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_b0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_b0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shiftedval = lshr i64 %val, %numskipbits %widenumlowbits = zext i8 %numlowbits to i64 %notmask = shl nsw i64 -1, %widenumlowbits @@ -3709,44 +3709,44 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_b1: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB32_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB32_2: -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_b1: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB32_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB32_2: -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_b1: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB32_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB32_2: +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_b1: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB32_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB32_2: +; X86-BMI2-NEXT: bzhil %eax, %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_b1: ; X64-NOBMI: # %bb.0: @@ -3760,20 +3760,20 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_b1: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_b1: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_b1: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_b1: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shiftedval = lshr i64 %val, %numskipbits %truncshiftedval = trunc i64 %shiftedval to i32 %widenumlowbits = zext i8 %numlowbits to i32 @@ -3811,44 +3811,44 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_b2: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB33_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB33_2: -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_b2: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB33_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB33_2: -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_b2: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB33_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB33_2: +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_b2: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB33_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB33_2: +; X86-BMI2-NEXT: bzhil %eax, %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_b2: ; X64-NOBMI: # %bb.0: @@ -3862,20 +3862,20 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_b2: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_b2: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_b2: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_b2: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shiftedval = lshr i64 %val, %numskipbits %widenumlowbits = zext i8 %numlowbits to i32 %notmask = shl nsw i32 -1, %widenumlowbits @@ -3920,59 +3920,59 @@ define i32 @bextr64_32_b3(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_b3: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB34_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB34_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: jne .LBB34_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ecx -; X86-BMI1NOTBM-NEXT: .LBB34_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %ecx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_b3: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB34_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB34_2: -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: jne .LBB34_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shlxl %eax, %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB34_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %ecx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_b3: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB34_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB34_2: +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: jne .LBB34_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %ecx +; X86-BMI1-NEXT: .LBB34_4: +; X86-BMI1-NEXT: andnl %edx, %ecx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_b3: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB34_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB34_2: +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB34_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X86-BMI2-NEXT: .LBB34_4: +; X86-BMI2-NEXT: andnl %edx, %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_b3: ; X64-NOBMI: # %bb.0: @@ -3988,20 +3988,20 @@ define i32 @bextr64_32_b3(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_b3: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_b3: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_b3: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_b3: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shiftedval = lshr i64 %val, %numskipbits %widenumlowbits = zext i8 %numlowbits to i64 %notmask = shl nsw i64 4294967295, %widenumlowbits @@ -4038,47 +4038,47 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_c0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_c0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_c0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl %edi, %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_c0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_c0: ; X64-NOBMI: # %bb.0: @@ -4102,45 +4102,45 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_c0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movl %edi, %ebx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebp -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebx, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_c0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %edx, %ebx -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %ebp -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_c0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl %edi, %ebx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %ebx +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movl $-1, %ebp +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: movl %ebp, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebx, %ebp +; X64-BMI1-NEXT: movl %ebp, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_c0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %edx, %ebx +; X64-BMI2-NEXT: shrxl %esi, %edi, %ebp +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -4172,47 +4172,47 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_c1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_c1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_c1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl %edi, %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_c1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_c1_indexzext: ; X64-NOBMI: # %bb.0: @@ -4236,45 +4236,45 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_c1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movl %edi, %ebx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebp -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebx, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_c1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %edx, %ebx -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %ebp -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_c1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl %edi, %ebx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %ebx +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movl $-1, %ebp +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: movl %ebp, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebx, %ebp +; X64-BMI1-NEXT: movl %ebp, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_c1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %edx, %ebx +; X64-BMI2-NEXT: shrxl %esi, %edi, %ebp +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip %numhighbits = sub i8 32, %numlowbits @@ -4309,49 +4309,49 @@ define i32 @bextr32_c2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_c2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_c2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, (%eax), %esi -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_c2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl %edi, %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_c2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, (%eax), %esi +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_c2_load: ; X64-NOBMI: # %bb.0: @@ -4375,45 +4375,45 @@ define i32 @bextr32_c2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_c2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movl (%rdi), %ebp -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X64-BMI1NOTBM-NEXT: movl %ebx, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebp, %ebx -; X64-BMI1NOTBM-NEXT: movl %ebx, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_c2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %edx, %ebx -; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %ebp -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_c2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl (%rdi), %ebp +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movl $-1, %ebx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrl %cl, %ebx +; X64-BMI1-NEXT: movl %ebx, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebp, %ebx +; X64-BMI1-NEXT: movl %ebx, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_c2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %edx, %ebx +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %ebp +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits @@ -4447,49 +4447,49 @@ define i32 @bextr32_c3_load_indexzext(i32* %w, i8 %numskipbits, i8 %numlowbits) ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_c3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_c3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, (%eax), %esi -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_c3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl %edi, %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_c3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, (%eax), %esi +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_c3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -4513,45 +4513,45 @@ define i32 @bextr32_c3_load_indexzext(i32* %w, i8 %numskipbits, i8 %numlowbits) ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_c3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movl (%rdi), %ebp -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X64-BMI1NOTBM-NEXT: movl %ebx, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebp, %ebx -; X64-BMI1NOTBM-NEXT: movl %ebx, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_c3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %edx, %ebx -; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %ebp -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_c3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl (%rdi), %ebp +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movl $-1, %ebx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrl %cl, %ebx +; X64-BMI1-NEXT: movl %ebx, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebp, %ebx +; X64-BMI1-NEXT: movl %ebx, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_c3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %edx, %ebx +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %ebp +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip @@ -4586,47 +4586,47 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_c4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_c4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_c4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl %edi, %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_c4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_c4_commutative: ; X64-NOBMI: # %bb.0: @@ -4650,45 +4650,45 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_c4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movl %edi, %ebx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebp -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebx, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_c4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %edx, %ebx -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %ebp -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_c4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl %edi, %ebx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %ebx +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movl $-1, %ebp +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: movl %ebp, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebx, %ebp +; X64-BMI1-NEXT: movl %ebp, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_c4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %edx, %ebx +; X64-BMI2-NEXT: shrxl %esi, %edi, %ebp +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -4725,57 +4725,57 @@ define i32 @bextr32_c5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_c5_skipextrauses: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $16, %esp -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-BMI1NOTBM-NEXT: movl %ebx, %ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %ebx, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_c5_skipextrauses: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $16, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1BMI2-NEXT: shrxl %edi, {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %esi -; X86-BMI1BMI2-NEXT: movl %edi, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_c5_skipextrauses: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $16, %esp +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI1-NEXT: movl %ebx, %ecx +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl %edi, %esi +; X86-BMI1-NEXT: movl %ebx, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_c5_skipextrauses: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $16, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: shrxl %edi, {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, %esi, %esi +; X86-BMI2-NEXT: movl %edi, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_c5_skipextrauses: ; X64-NOBMI: # %bb.0: @@ -4801,51 +4801,51 @@ define i32 @bextr32_c5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_c5_skipextrauses: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: movl %esi, %r14d -; X64-BMI1NOTBM-NEXT: movl %edi, %ebp -; X64-BMI1NOTBM-NEXT: movl %r14d, %ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X64-BMI1NOTBM-NEXT: movl %ebx, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebp, %ebx -; X64-BMI1NOTBM-NEXT: movl %r14d, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: movl %ebx, %eax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_c5_skipextrauses: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: movl %edx, %ebx -; X64-BMI1BMI2-NEXT: movl %esi, %ebp -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %r14d -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %r14d, %ebx -; X64-BMI1BMI2-NEXT: movl %ebp, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_c5_skipextrauses: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: movl %esi, %r14d +; X64-BMI1-NEXT: movl %edi, %ebp +; X64-BMI1-NEXT: movl %r14d, %ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movl $-1, %ebx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrl %cl, %ebx +; X64-BMI1-NEXT: movl %ebx, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebp, %ebx +; X64-BMI1-NEXT: movl %r14d, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: movl %ebx, %eax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_c5_skipextrauses: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: movl %edx, %ebx +; X64-BMI2-NEXT: movl %esi, %ebp +; X64-BMI2-NEXT: shrxl %esi, %edi, %r14d +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %r14d, %ebx +; X64-BMI2-NEXT: movl %ebp, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -4904,95 +4904,95 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_c0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB41_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB41_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB41_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB41_4: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl %ebp, %esi -; X86-BMI1NOTBM-NEXT: andl %ebx, %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_c0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB41_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB41_2: -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %ebp -; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB41_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %ebp -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx -; X86-BMI1BMI2-NEXT: .LBB41_4: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebp, %esi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx -; X86-BMI1BMI2-NEXT: addl $12, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: popl %ebp -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_c0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB41_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB41_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %ebp +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: shrl %cl, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB41_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %ebp +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB41_4: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl %ebp, %esi +; X86-BMI1-NEXT: andl %ebx, %edi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_c0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $12, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB41_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB41_2: +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %ebp +; X86-BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB41_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ebx, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: .LBB41_4: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl %ebp, %esi +; X86-BMI2-NEXT: andl %ebx, %edi +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: movl %edi, %edx +; X86-BMI2-NEXT: addl $12, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_c0: ; X64-NOBMI: # %bb.0: @@ -5016,45 +5016,45 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %r14 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_c0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %r14 -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r14, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_c0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movq %rdx, %rbx -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %r14 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_c0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: movq %rdi, %r14 +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %r14 +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r14, %rbx +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_c0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movq %rdx, %rbx +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %r14 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits @@ -5110,95 +5110,95 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_c1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB42_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB42_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB42_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB42_4: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl %ebp, %esi -; X86-BMI1NOTBM-NEXT: andl %ebx, %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_c1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB42_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB42_2: -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %ebp -; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB42_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %ebp -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx -; X86-BMI1BMI2-NEXT: .LBB42_4: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebp, %esi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx -; X86-BMI1BMI2-NEXT: addl $12, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: popl %ebp -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_c1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB42_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB42_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %ebp +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: shrl %cl, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB42_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %ebp +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB42_4: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl %ebp, %esi +; X86-BMI1-NEXT: andl %ebx, %edi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_c1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $12, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB42_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB42_2: +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %ebp +; X86-BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB42_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ebx, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: .LBB42_4: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl %ebp, %esi +; X86-BMI2-NEXT: andl %ebx, %edi +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: movl %edi, %edx +; X86-BMI2-NEXT: addl $12, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_c1_indexzext: ; X64-NOBMI: # %bb.0: @@ -5222,46 +5222,46 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun ; X64-NOBMI-NEXT: popq %r14 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_c1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %r14 -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r14, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_c1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %edx, %ebx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %r14 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_c1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movq %rdi, %r14 +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrq %cl, %r14 +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r14, %rbx +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_c1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %edx, %ebx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %r14 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: retq %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip %numhighbits = sub i8 64, %numlowbits @@ -5320,97 +5320,97 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_c2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %esi -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB43_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB43_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB43_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB43_4: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl %ebp, %esi -; X86-BMI1NOTBM-NEXT: andl %ebx, %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_c2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl (%eax), %esi -; X86-BMI1BMI2-NEXT: movl 4(%eax), %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB43_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB43_2: -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %ebp -; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB43_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %ebp -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx -; X86-BMI1BMI2-NEXT: .LBB43_4: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebp, %esi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx -; X86-BMI1BMI2-NEXT: addl $12, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: popl %ebp -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_c2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %esi +; X86-BMI1-NEXT: movl 4(%eax), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB43_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB43_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %ebp +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: shrl %cl, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB43_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %ebp +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB43_4: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl %ebp, %esi +; X86-BMI1-NEXT: andl %ebx, %edi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_c2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $12, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%eax), %esi +; X86-BMI2-NEXT: movl 4(%eax), %eax +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB43_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB43_2: +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %ebp +; X86-BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB43_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ebx, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: .LBB43_4: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl %ebp, %esi +; X86-BMI2-NEXT: andl %ebx, %edi +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: movl %edi, %edx +; X86-BMI2-NEXT: addl $12, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_c2_load: ; X64-NOBMI: # %bb.0: @@ -5434,45 +5434,45 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: popq %r14 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_c2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: movq (%rdi), %r14 -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %r14 -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r14, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_c2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movq %rdx, %rbx -; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %r14 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_c2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: movq (%rdi), %r14 +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %r14 +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r14, %rbx +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_c2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movq %rdx, %rbx +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %r14 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits @@ -5530,97 +5530,97 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits) ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_c3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %esi -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB44_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB44_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB44_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB44_4: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl %ebp, %esi -; X86-BMI1NOTBM-NEXT: andl %ebx, %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_c3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl (%eax), %esi -; X86-BMI1BMI2-NEXT: movl 4(%eax), %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB44_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB44_2: -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %ebp -; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB44_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %ebp -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx -; X86-BMI1BMI2-NEXT: .LBB44_4: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebp, %esi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx -; X86-BMI1BMI2-NEXT: addl $12, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: popl %ebp -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_c3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %esi +; X86-BMI1-NEXT: movl 4(%eax), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB44_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB44_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %ebp +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: shrl %cl, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB44_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %ebp +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB44_4: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl %ebp, %esi +; X86-BMI1-NEXT: andl %ebx, %edi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_c3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $12, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%eax), %esi +; X86-BMI2-NEXT: movl 4(%eax), %eax +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB44_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB44_2: +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %ebp +; X86-BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB44_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ebx, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: .LBB44_4: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl %ebp, %esi +; X86-BMI2-NEXT: andl %ebx, %edi +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: movl %edi, %edx +; X86-BMI2-NEXT: addl $12, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_c3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -5644,46 +5644,46 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits) ; X64-NOBMI-NEXT: popq %r14 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_c3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movq (%rdi), %r14 -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %r14 -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r14, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_c3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %edx, %ebx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %r14 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_c3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movq (%rdi), %r14 +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrq %cl, %r14 +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r14, %rbx +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_c3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %edx, %ebx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %r14 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip @@ -5742,95 +5742,95 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_c4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB45_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB45_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB45_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB45_4: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl %ebp, %esi -; X86-BMI1NOTBM-NEXT: andl %ebx, %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_c4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB45_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB45_2: -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %ebp -; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB45_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %ebp -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx -; X86-BMI1BMI2-NEXT: .LBB45_4: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebp, %esi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx -; X86-BMI1BMI2-NEXT: addl $12, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: popl %ebp -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_c4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB45_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB45_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %ebp +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: shrl %cl, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB45_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %ebp +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB45_4: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl %ebp, %esi +; X86-BMI1-NEXT: andl %ebx, %edi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_c4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $12, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB45_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB45_2: +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %ebp +; X86-BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB45_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ebx, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: .LBB45_4: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl %ebp, %esi +; X86-BMI2-NEXT: andl %ebx, %edi +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: movl %edi, %edx +; X86-BMI2-NEXT: addl $12, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_c4_commutative: ; X64-NOBMI: # %bb.0: @@ -5854,45 +5854,45 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X64-NOBMI-NEXT: popq %r14 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_c4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %r14 -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r14, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_c4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movq %rdx, %rbx -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %r14 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_c4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: movq %rdi, %r14 +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %r14 +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r14, %rbx +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_c4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movq %rdx, %rbx +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %r14 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits @@ -5953,105 +5953,105 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_c5_skipextrauses: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB46_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB46_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB46_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebp, %ebx -; X86-BMI1NOTBM-NEXT: xorl %ebp, %ebp -; X86-BMI1NOTBM-NEXT: .LBB46_4: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl %ebx, %esi -; X86-BMI1NOTBM-NEXT: andl %ebp, %edi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1NOTBM-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_c5_skipextrauses: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB46_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB46_2: -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %ebp -; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB46_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %ebp -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx -; X86-BMI1BMI2-NEXT: .LBB46_4: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebp, %esi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1BMI2-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx -; X86-BMI1BMI2-NEXT: addl $12, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: popl %ebp -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_c5_skipextrauses: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB46_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB46_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: movl $-1, %ebp +; X86-BMI1-NEXT: shrl %cl, %ebp +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB46_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebp, %ebx +; X86-BMI1-NEXT: xorl %ebp, %ebp +; X86-BMI1-NEXT: .LBB46_4: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl %ebx, %esi +; X86-BMI1-NEXT: andl %ebp, %edi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_c5_skipextrauses: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $12, %esp +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB46_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB46_2: +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %ebp +; X86-BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB46_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ebx, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: .LBB46_4: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl %ebp, %esi +; X86-BMI2-NEXT: andl %ebx, %edi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: pushl {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: movl %edi, %edx +; X86-BMI2-NEXT: addl $12, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_c5_skipextrauses: ; X64-NOBMI: # %bb.0: @@ -6077,51 +6077,51 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X64-NOBMI-NEXT: popq %r15 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_c5_skipextrauses: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r15 -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: movq %rsi, %r14 -; X64-BMI1NOTBM-NEXT: movq %rdi, %r15 -; X64-BMI1NOTBM-NEXT: movl %r14d, %ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %r15 -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r15, %rbx -; X64-BMI1NOTBM-NEXT: movq %r14, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: popq %r15 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_c5_skipextrauses: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r15 -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: movq %rdx, %rbx -; X64-BMI1BMI2-NEXT: movq %rsi, %r14 -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %r15 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r15, %rbx -; X64-BMI1BMI2-NEXT: movq %r14, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: movq %rbx, %rax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: popq %r15 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_c5_skipextrauses: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r15 +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: movq %rsi, %r14 +; X64-BMI1-NEXT: movq %rdi, %r15 +; X64-BMI1-NEXT: movl %r14d, %ecx +; X64-BMI1-NEXT: shrq %cl, %r15 +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r15, %rbx +; X64-BMI1-NEXT: movq %r14, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: popq %r15 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_c5_skipextrauses: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r15 +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: movq %rdx, %rbx +; X64-BMI2-NEXT: movq %rsi, %r14 +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %r15 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r15, %rbx +; X64-BMI2-NEXT: movq %r14, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: movq %rbx, %rax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: popq %r15 +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits @@ -6162,54 +6162,54 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_c0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB47_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB47_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB47_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: .LBB47_4: -; X86-BMI1NOTBM-NEXT: andl %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_c0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB47_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB47_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB47_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB47_4: -; X86-BMI1BMI2-NEXT: andl %edx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_c0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %esi, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB47_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB47_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB47_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: .LBB47_4: +; X86-BMI1-NEXT: andl %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_c0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB47_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edx +; X86-BMI2-NEXT: .LBB47_2: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl $-1, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB47_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: .LBB47_4: +; X86-BMI2-NEXT: andl %edx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_c0: ; X64-NOBMI: # %bb.0: @@ -6224,20 +6224,20 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_c0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_c0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_c0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_c0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits @@ -6270,44 +6270,44 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_c1: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB48_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB48_2: -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_c1: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB48_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB48_2: -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_c1: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB48_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB48_2: +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_c1: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB48_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB48_2: +; X86-BMI2-NEXT: bzhil %eax, %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_c1: ; X64-NOBMI: # %bb.0: @@ -6322,20 +6322,20 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_c1: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_c1: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_c1: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_c1: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %truncshifted = trunc i64 %shifted to i32 %numhighbits = sub i32 32, %numlowbits @@ -6369,44 +6369,44 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_c2: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB49_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB49_2: -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_c2: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB49_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB49_2: -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_c2: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB49_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB49_2: +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_c2: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB49_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB49_2: +; X86-BMI2-NEXT: bzhil %eax, %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_c2: ; X64-NOBMI: # %bb.0: @@ -6421,20 +6421,20 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_c2: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_c2: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_c2: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_c2: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -6475,59 +6475,59 @@ define i32 @bextr64_32_c3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_c3: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB50_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB50_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB50_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: .LBB50_4: -; X86-BMI1NOTBM-NEXT: andl %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_c3: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB50_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB50_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB50_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: .LBB50_4: -; X86-BMI1BMI2-NEXT: andl %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_c3: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %esi, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB50_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB50_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB50_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: .LBB50_4: +; X86-BMI1-NEXT: andl %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_c3: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB50_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edx +; X86-BMI2-NEXT: .LBB50_2: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: movl $-1, %esi +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB50_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: .LBB50_4: +; X86-BMI2-NEXT: andl %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_c3: ; X64-NOBMI: # %bb.0: @@ -6542,28 +6542,28 @@ define i32 @bextr64_32_c3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_c3: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rdi -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rax -; X64-BMI1NOTBM-NEXT: andl %edi, %eax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_c3: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rcx -; X64-BMI1BMI2-NEXT: negb %dl -; X64-BMI1BMI2-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-BMI1BMI2-NEXT: shrxq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: andl %ecx, %eax -; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_c3: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %rdi +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrq %cl, %rax +; X64-BMI1-NEXT: andl %edi, %eax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_c3: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rcx +; X64-BMI2-NEXT: negb %dl +; X64-BMI2-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF +; X64-BMI2-NEXT: shrxq %rdx, %rax, %rax +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 4294967295, %numhighbits @@ -6589,22 +6589,22 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_d0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: orl %eax, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_d0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_d0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: orl %eax, %ecx +; X86-BMI1-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_d0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_d0: ; X64-NOBMI: # %bb.0: @@ -6618,19 +6618,19 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_d0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_d0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_d0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_d0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits %highbitscleared = shl i32 %shifted, %numhighbits @@ -6651,22 +6651,22 @@ define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_d1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: orl %eax, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_d1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_d1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: orl %eax, %ecx +; X86-BMI1-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_d1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_d1_indexzext: ; X64-NOBMI: # %bb.0: @@ -6680,19 +6680,19 @@ define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_d1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_d1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_d1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_d1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip %numhighbits = sub i8 32, %numlowbits @@ -6716,24 +6716,24 @@ define i32 @bextr32_d2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_d2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_d2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: shrxl %edx, (%ecx), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_d2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_d2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_d2_load: ; X64-NOBMI: # %bb.0: @@ -6747,19 +6747,19 @@ define i32 @bextr32_d2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_d2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_d2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_d2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, (%rdi), %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_d2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits @@ -6782,24 +6782,24 @@ define i32 @bextr32_d3_load_indexzext(i32* %w, i8 %numskipbits, i8 %numlowbits) ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_d3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_d3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: shrxl %edx, (%ecx), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_d3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_d3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_d3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -6813,19 +6813,19 @@ define i32 @bextr32_d3_load_indexzext(i32* %w, i8 %numskipbits, i8 %numlowbits) ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_d3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_d3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_d3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, (%rdi), %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_d3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip @@ -6857,37 +6857,37 @@ define i32 @bextr32_d5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_d5_skipextrauses: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl %al, %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %eax, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $8, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_d5_skipextrauses: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %esi -; X86-BMI1BMI2-NEXT: movl %ecx, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: addl $8, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_d5_skipextrauses: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl %al, %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %eax, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $8, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_d5_skipextrauses: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: bzhil %eax, %edx, %esi +; X86-BMI2-NEXT: movl %ecx, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_d5_skipextrauses: ; X64-NOBMI: # %bb.0: @@ -6905,29 +6905,29 @@ define i32 @bextr32_d5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_d5_skipextrauses: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %ebx -; X64-BMI1NOTBM-NEXT: movl %esi, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: movl %ebx, %eax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_d5_skipextrauses: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %ebx -; X64-BMI1BMI2-NEXT: movl %esi, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_d5_skipextrauses: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %ebx +; X64-BMI1-NEXT: movl %esi, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: movl %ebx, %eax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_d5_skipextrauses: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %ebx +; X64-BMI2-NEXT: movl %esi, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits %highbitscleared = shl i32 %shifted, %numhighbits @@ -6987,94 +6987,94 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_d0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB56_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB56_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %edi, %ebx -; X86-BMI1NOTBM-NEXT: jne .LBB56_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %ebx -; X86-BMI1NOTBM-NEXT: .LBB56_4: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB56_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB56_6: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB56_8 -; X86-BMI1NOTBM-NEXT: # %bb.7: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: .LBB56_8: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_d0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %esi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB56_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: .LBB56_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB56_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi -; X86-BMI1BMI2-NEXT: .LBB56_4: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB56_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB56_6: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB56_8 -; X86-BMI1BMI2-NEXT: # %bb.7: -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: .LBB56_8: -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_d0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: shrdl %cl, %edx, %edi +; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB56_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB56_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shldl %cl, %edi, %eax +; X86-BMI1-NEXT: shll %cl, %edi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %edi, %ebx +; X86-BMI1-NEXT: jne .LBB56_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %ebx +; X86-BMI1-NEXT: .LBB56_4: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edx +; X86-BMI1-NEXT: jne .LBB56_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB56_6: +; X86-BMI1-NEXT: shrdl %cl, %ebx, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB56_8 +; X86-BMI1-NEXT: # %bb.7: +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: .LBB56_8: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_d0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %esi +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB56_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: .LBB56_2: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB56_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl $0, %edi +; X86-BMI2-NEXT: .LBB56_4: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI2-NEXT: jne .LBB56_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: .LBB56_6: +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB56_8 +; X86-BMI2-NEXT: # %bb.7: +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: .LBB56_8: +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_d0: ; X64-NOBMI: # %bb.0: @@ -7088,19 +7088,19 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_d0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_d0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_d0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_d0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits %highbitscleared = shl i64 %shifted, %numhighbits @@ -7157,94 +7157,94 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_d1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB57_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB57_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %edi, %ebx -; X86-BMI1NOTBM-NEXT: jne .LBB57_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %ebx -; X86-BMI1NOTBM-NEXT: .LBB57_4: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB57_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB57_6: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB57_8 -; X86-BMI1NOTBM-NEXT: # %bb.7: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: .LBB57_8: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_d1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %esi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB57_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: .LBB57_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB57_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi -; X86-BMI1BMI2-NEXT: .LBB57_4: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB57_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB57_6: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB57_8 -; X86-BMI1BMI2-NEXT: # %bb.7: -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: .LBB57_8: -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_d1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: shrdl %cl, %edx, %edi +; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB57_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB57_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shldl %cl, %edi, %eax +; X86-BMI1-NEXT: shll %cl, %edi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %edi, %ebx +; X86-BMI1-NEXT: jne .LBB57_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %ebx +; X86-BMI1-NEXT: .LBB57_4: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edx +; X86-BMI1-NEXT: jne .LBB57_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB57_6: +; X86-BMI1-NEXT: shrdl %cl, %ebx, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB57_8 +; X86-BMI1-NEXT: # %bb.7: +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: .LBB57_8: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_d1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %esi +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB57_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: .LBB57_2: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB57_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl $0, %edi +; X86-BMI2-NEXT: .LBB57_4: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI2-NEXT: jne .LBB57_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: .LBB57_6: +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB57_8 +; X86-BMI2-NEXT: # %bb.7: +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: .LBB57_8: +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_d1_indexzext: ; X64-NOBMI: # %bb.0: @@ -7258,21 +7258,21 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_d1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_d1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $edx killed $edx def $rdx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_d1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_d1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $edx killed $edx def $rdx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip %numhighbits = sub i8 64, %numlowbits @@ -7332,96 +7332,96 @@ define i64 @bextr64_d2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_d2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %edi -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %edx -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB58_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB58_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %edi, %ebx -; X86-BMI1NOTBM-NEXT: jne .LBB58_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %ebx -; X86-BMI1NOTBM-NEXT: .LBB58_4: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB58_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB58_6: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB58_8 -; X86-BMI1NOTBM-NEXT: # %bb.7: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: .LBB58_8: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_d2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl (%edx), %eax -; X86-BMI1BMI2-NEXT: movl 4(%edx), %edx -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB58_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: .LBB58_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB58_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi -; X86-BMI1BMI2-NEXT: .LBB58_4: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB58_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB58_6: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB58_8 -; X86-BMI1BMI2-NEXT: # %bb.7: -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: .LBB58_8: -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_d2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %edi +; X86-BMI1-NEXT: movl 4(%eax), %edx +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: shrdl %cl, %edx, %edi +; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB58_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB58_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shldl %cl, %edi, %eax +; X86-BMI1-NEXT: shll %cl, %edi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %edi, %ebx +; X86-BMI1-NEXT: jne .LBB58_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %ebx +; X86-BMI1-NEXT: .LBB58_4: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edx +; X86-BMI1-NEXT: jne .LBB58_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB58_6: +; X86-BMI1-NEXT: shrdl %cl, %ebx, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB58_8 +; X86-BMI1-NEXT: # %bb.7: +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: .LBB58_8: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_d2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: movl 4(%edx), %edx +; X86-BMI2-NEXT: shrxl %ecx, %edx, %esi +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB58_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: .LBB58_2: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB58_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl $0, %edi +; X86-BMI2-NEXT: .LBB58_4: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI2-NEXT: jne .LBB58_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: .LBB58_6: +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB58_8 +; X86-BMI2-NEXT: # %bb.7: +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: .LBB58_8: +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_d2_load: ; X64-NOBMI: # %bb.0: @@ -7435,19 +7435,19 @@ define i64 @bextr64_d2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_d2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_d2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_d2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_d2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits @@ -7506,96 +7506,96 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits) ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_d3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %edi -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %edx -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB59_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB59_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %edi, %ebx -; X86-BMI1NOTBM-NEXT: jne .LBB59_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %ebx -; X86-BMI1NOTBM-NEXT: .LBB59_4: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB59_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB59_6: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB59_8 -; X86-BMI1NOTBM-NEXT: # %bb.7: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: .LBB59_8: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_d3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl (%edx), %eax -; X86-BMI1BMI2-NEXT: movl 4(%edx), %edx -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB59_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: .LBB59_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB59_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi -; X86-BMI1BMI2-NEXT: .LBB59_4: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB59_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB59_6: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB59_8 -; X86-BMI1BMI2-NEXT: # %bb.7: -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: .LBB59_8: -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_d3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %edi +; X86-BMI1-NEXT: movl 4(%eax), %edx +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: shrdl %cl, %edx, %edi +; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB59_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB59_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shldl %cl, %edi, %eax +; X86-BMI1-NEXT: shll %cl, %edi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %edi, %ebx +; X86-BMI1-NEXT: jne .LBB59_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %ebx +; X86-BMI1-NEXT: .LBB59_4: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edx +; X86-BMI1-NEXT: jne .LBB59_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB59_6: +; X86-BMI1-NEXT: shrdl %cl, %ebx, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB59_8 +; X86-BMI1-NEXT: # %bb.7: +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: .LBB59_8: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_d3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: movl 4(%edx), %edx +; X86-BMI2-NEXT: shrxl %ecx, %edx, %esi +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB59_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: .LBB59_2: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB59_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl $0, %edi +; X86-BMI2-NEXT: .LBB59_4: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI2-NEXT: jne .LBB59_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: .LBB59_6: +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB59_8 +; X86-BMI2-NEXT: # %bb.7: +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: .LBB59_8: +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_d3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -7609,21 +7609,21 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits) ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_d3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_d3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $edx killed $edx def $rdx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_d3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_d3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $edx killed $edx def $rdx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip @@ -7696,118 +7696,118 @@ define i64 @bextr64_d5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_d5_skipextrauses: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %ebx -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB60_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebx -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi -; X86-BMI1NOTBM-NEXT: .LBB60_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: jne .LBB60_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebp -; X86-BMI1NOTBM-NEXT: .LBB60_4: -; X86-BMI1NOTBM-NEXT: movl %ebp, %esi -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edi -; X86-BMI1NOTBM-NEXT: jne .LBB60_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edx -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: .LBB60_6: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %edx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: jne .LBB60_8 -; X86-BMI1NOTBM-NEXT: # %bb.7: -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: .LBB60_8: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ecx -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_d5_skipextrauses: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %edi -; X86-BMI1BMI2-NEXT: shrxl %eax, %edx, %edx -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB60_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %edi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB60_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %edi, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB60_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %edx -; X86-BMI1BMI2-NEXT: movl $0, %ebx -; X86-BMI1BMI2-NEXT: .LBB60_4: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edi -; X86-BMI1BMI2-NEXT: jne .LBB60_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: .LBB60_6: -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: jne .LBB60_8 -; X86-BMI1BMI2-NEXT: # %bb.7: -; X86-BMI1BMI2-NEXT: movl %ebx, %edi -; X86-BMI1BMI2-NEXT: .LBB60_8: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ecx -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_d5_skipextrauses: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: shrdl %cl, %edx, %ebx +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: je .LBB60_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %ebx +; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: .LBB60_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shldl %cl, %ebx, %esi +; X86-BMI1-NEXT: shll %cl, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %ebx, %ebp +; X86-BMI1-NEXT: jne .LBB60_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %ebp +; X86-BMI1-NEXT: .LBB60_4: +; X86-BMI1-NEXT: movl %ebp, %esi +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edi +; X86-BMI1-NEXT: jne .LBB60_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %ebx, %edx +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: .LBB60_6: +; X86-BMI1-NEXT: shrdl %cl, %ebp, %edx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: jne .LBB60_8 +; X86-BMI1-NEXT: # %bb.7: +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: .LBB60_8: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ecx +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_d5_skipextrauses: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %edi +; X86-BMI2-NEXT: shrxl %eax, %edx, %edx +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB60_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %edi +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB60_2: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %edi, %edx +; X86-BMI2-NEXT: shlxl %ecx, %edi, %ebx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB60_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ebx, %edx +; X86-BMI2-NEXT: movl $0, %ebx +; X86-BMI2-NEXT: .LBB60_4: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edi +; X86-BMI2-NEXT: jne .LBB60_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: .LBB60_6: +; X86-BMI2-NEXT: shrdl %cl, %edx, %ebx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: jne .LBB60_8 +; X86-BMI2-NEXT: # %bb.7: +; X86-BMI2-NEXT: movl %ebx, %edi +; X86-BMI2-NEXT: .LBB60_8: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %ecx +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_d5_skipextrauses: ; X64-NOBMI: # %bb.0: @@ -7825,29 +7825,29 @@ define i64 @bextr64_d5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_d5_skipextrauses: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rbx -; X64-BMI1NOTBM-NEXT: movq %rsi, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_d5_skipextrauses: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rbx -; X64-BMI1BMI2-NEXT: movq %rsi, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: movq %rbx, %rax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_d5_skipextrauses: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rbx +; X64-BMI1-NEXT: movq %rsi, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_d5_skipextrauses: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rbx +; X64-BMI2-NEXT: movq %rsi, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: movq %rbx, %rax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits %highbitscleared = shl i64 %shifted, %numhighbits @@ -7895,71 +7895,71 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_d0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB61_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB61_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %edx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB61_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: .LBB61_4: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB61_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: .LBB61_6: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_d0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB61_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB61_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB61_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB61_4: -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB61_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %eax -; X86-BMI1BMI2-NEXT: .LBB61_6: -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_d0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: shrdl %cl, %esi, %edx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB61_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB61_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shldl %cl, %edx, %eax +; X86-BMI1-NEXT: shll %cl, %edx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB61_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: .LBB61_4: +; X86-BMI1-NEXT: shrdl %cl, %eax, %edx +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB61_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: .LBB61_6: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_d0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB61_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB61_2: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB61_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB61_4: +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB61_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB61_6: +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_d0: ; X64-NOBMI: # %bb.0: @@ -7974,21 +7974,21 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_d0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_d0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_d0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_d0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits %highbitscleared = shl i64 %shifted, %numhighbits @@ -8021,44 +8021,44 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_d1: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB62_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB62_2: -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_d1: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB62_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB62_2: -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_d1: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB62_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB62_2: +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_d1: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB62_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB62_2: +; X86-BMI2-NEXT: bzhil %eax, %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_d1: ; X64-NOBMI: # %bb.0: @@ -8073,20 +8073,20 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_d1: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_d1: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_d1: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_d1: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %truncshifted = trunc i64 %shifted to i32 %numhighbits = sub i32 32, %numlowbits @@ -8111,31 +8111,22 @@ define void @pr38938(i32* %a0, i64* %a1) nounwind { ; X86-NOBMI-NEXT: incl (%eax,%ecx) ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: pr38938: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl $2581, %edx # imm = 0xA15 -; X86-BMI1NOTBM-NEXT: bextrl %edx, (%ecx), %ecx -; X86-BMI1NOTBM-NEXT: incl (%eax,%ecx,4) -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1TBM-LABEL: pr38938: -; X86-BMI1TBM: # %bb.0: -; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1TBM-NEXT: bextrl $2581, (%ecx), %ecx # imm = 0xA15 -; X86-BMI1TBM-NEXT: incl (%eax,%ecx,4) -; X86-BMI1TBM-NEXT: retl -; -; X86-BMI1NOTBMBMI2-LABEL: pr38938: -; X86-BMI1NOTBMBMI2: # %bb.0: -; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBMBMI2-NEXT: movl $2581, %edx # imm = 0xA15 -; X86-BMI1NOTBMBMI2-NEXT: bextrl %edx, (%ecx), %ecx -; X86-BMI1NOTBMBMI2-NEXT: incl (%eax,%ecx,4) -; X86-BMI1NOTBMBMI2-NEXT: retl +; X86-BMINOTBM-LABEL: pr38938: +; X86-BMINOTBM: # %bb.0: +; X86-BMINOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMINOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMINOTBM-NEXT: movl $2581, %edx # imm = 0xA15 +; X86-BMINOTBM-NEXT: bextrl %edx, (%ecx), %ecx +; X86-BMINOTBM-NEXT: incl (%eax,%ecx,4) +; X86-BMINOTBM-NEXT: retl +; +; X86-BMITBM-LABEL: pr38938: +; X86-BMITBM: # %bb.0: +; X86-BMITBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMITBM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMITBM-NEXT: bextrl $2581, (%ecx), %ecx # imm = 0xA15 +; X86-BMITBM-NEXT: incl (%eax,%ecx,4) +; X86-BMITBM-NEXT: retl ; ; X64-NOBMI-LABEL: pr38938: ; X64-NOBMI: # %bb.0: @@ -8145,25 +8136,18 @@ define void @pr38938(i32* %a0, i64* %a1) nounwind { ; X64-NOBMI-NEXT: incl (%rdi,%rax) ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: pr38938: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl $2581, %eax # imm = 0xA15 -; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rsi), %rax -; X64-BMI1NOTBM-NEXT: incl (%rdi,%rax,4) -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1TBM-LABEL: pr38938: -; X64-BMI1TBM: # %bb.0: -; X64-BMI1TBM-NEXT: bextrq $2581, (%rsi), %rax # imm = 0xA15 -; X64-BMI1TBM-NEXT: incl (%rdi,%rax,4) -; X64-BMI1TBM-NEXT: retq -; -; X64-BMI1NOTBMBMI2-LABEL: pr38938: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movl $2581, %eax # imm = 0xA15 -; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, (%rsi), %rax -; X64-BMI1NOTBMBMI2-NEXT: incl (%rdi,%rax,4) -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMINOTBM-LABEL: pr38938: +; X64-BMINOTBM: # %bb.0: +; X64-BMINOTBM-NEXT: movl $2581, %eax # imm = 0xA15 +; X64-BMINOTBM-NEXT: bextrq %rax, (%rsi), %rax +; X64-BMINOTBM-NEXT: incl (%rdi,%rax,4) +; X64-BMINOTBM-NEXT: retq +; +; X64-BMITBM-LABEL: pr38938: +; X64-BMITBM: # %bb.0: +; X64-BMITBM-NEXT: bextrq $2581, (%rsi), %rax # imm = 0xA15 +; X64-BMITBM-NEXT: incl (%rdi,%rax,4) +; X64-BMITBM-NEXT: retq %tmp = load i64, i64* %a1, align 8 %tmp1 = lshr i64 %tmp, 21 %tmp2 = and i64 %tmp1, 1023 @@ -8183,22 +8167,16 @@ define i32 @c0_i32(i32 %arg) nounwind { ; X86-NOBMI-NEXT: andl $1023, %eax # imm = 0x3FF ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: c0_i32: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMINOTBM-LABEL: c0_i32: +; X86-BMINOTBM: # %bb.0: +; X86-BMINOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X86-BMINOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMINOTBM-NEXT: retl ; -; X86-BMI1TBM-LABEL: c0_i32: -; X86-BMI1TBM: # %bb.0: -; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13 -; X86-BMI1TBM-NEXT: retl -; -; X86-BMI1NOTBMBMI2-LABEL: c0_i32: -; X86-BMI1NOTBMBMI2: # %bb.0: -; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 -; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBMBMI2-NEXT: retl +; X86-BMITBM-LABEL: c0_i32: +; X86-BMITBM: # %bb.0: +; X86-BMITBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13 +; X86-BMITBM-NEXT: retl ; ; X64-NOBMI-LABEL: c0_i32: ; X64-NOBMI: # %bb.0: @@ -8207,22 +8185,16 @@ define i32 @c0_i32(i32 %arg) nounwind { ; X64-NOBMI-NEXT: andl $1023, %eax # imm = 0x3FF ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: c0_i32: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1TBM-LABEL: c0_i32: -; X64-BMI1TBM: # %bb.0: -; X64-BMI1TBM-NEXT: bextrl $2579, %edi, %eax # imm = 0xA13 -; X64-BMI1TBM-NEXT: retq -; -; X64-BMI1NOTBMBMI2-LABEL: c0_i32: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 -; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMINOTBM-LABEL: c0_i32: +; X64-BMINOTBM: # %bb.0: +; X64-BMINOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X64-BMINOTBM-NEXT: bextrl %eax, %edi, %eax +; X64-BMINOTBM-NEXT: retq +; +; X64-BMITBM-LABEL: c0_i32: +; X64-BMITBM: # %bb.0: +; X64-BMITBM-NEXT: bextrl $2579, %edi, %eax # imm = 0xA13 +; X64-BMITBM-NEXT: retq %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 1023 ret i32 %tmp1 @@ -8301,25 +8273,18 @@ define i64 @c0_i64(i64 %arg) nounwind { ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: c0_i64: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1TBM-LABEL: c0_i64: -; X86-BMI1TBM: # %bb.0: -; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13 -; X86-BMI1TBM-NEXT: xorl %edx, %edx -; X86-BMI1TBM-NEXT: retl -; -; X86-BMI1NOTBMBMI2-LABEL: c0_i64: -; X86-BMI1NOTBMBMI2: # %bb.0: -; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 -; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBMBMI2-NEXT: xorl %edx, %edx -; X86-BMI1NOTBMBMI2-NEXT: retl +; X86-BMINOTBM-LABEL: c0_i64: +; X86-BMINOTBM: # %bb.0: +; X86-BMINOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X86-BMINOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMINOTBM-NEXT: xorl %edx, %edx +; X86-BMINOTBM-NEXT: retl +; +; X86-BMITBM-LABEL: c0_i64: +; X86-BMITBM: # %bb.0: +; X86-BMITBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13 +; X86-BMITBM-NEXT: xorl %edx, %edx +; X86-BMITBM-NEXT: retl ; ; X64-NOBMI-LABEL: c0_i64: ; X64-NOBMI: # %bb.0: @@ -8328,22 +8293,16 @@ define i64 @c0_i64(i64 %arg) nounwind { ; X64-NOBMI-NEXT: andl $1023, %eax # imm = 0x3FF ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: c0_i64: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl $2611, %eax # imm = 0xA33 -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1TBM-LABEL: c0_i64: -; X64-BMI1TBM: # %bb.0: -; X64-BMI1TBM-NEXT: bextrq $2611, %rdi, %rax # imm = 0xA33 -; X64-BMI1TBM-NEXT: retq -; -; X64-BMI1NOTBMBMI2-LABEL: c0_i64: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movl $2611, %eax # imm = 0xA33 -; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMINOTBM-LABEL: c0_i64: +; X64-BMINOTBM: # %bb.0: +; X64-BMINOTBM-NEXT: movl $2611, %eax # imm = 0xA33 +; X64-BMINOTBM-NEXT: bextrq %rax, %rdi, %rax +; X64-BMINOTBM-NEXT: retq +; +; X64-BMITBM-LABEL: c0_i64: +; X64-BMITBM: # %bb.0: +; X64-BMITBM-NEXT: bextrq $2611, %rdi, %rax # imm = 0xA33 +; X64-BMITBM-NEXT: retq %tmp0 = lshr i64 %arg, 51 %tmp1 = and i64 %tmp0, 1023 ret i64 %tmp1 @@ -8430,28 +8389,20 @@ define void @c5_i32(i32 %arg, i32* %ptr) nounwind { ; X86-NOBMI-NEXT: movl %ecx, (%eax) ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: c5_i32: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl $2579, %ecx # imm = 0xA13 -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl %ecx, (%eax) -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1TBM-LABEL: c5_i32: -; X86-BMI1TBM: # %bb.0: -; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %ecx # imm = 0xA13 -; X86-BMI1TBM-NEXT: movl %ecx, (%eax) -; X86-BMI1TBM-NEXT: retl -; -; X86-BMI1NOTBMBMI2-LABEL: c5_i32: -; X86-BMI1NOTBMBMI2: # %bb.0: -; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %ecx # imm = 0xA13 -; X86-BMI1NOTBMBMI2-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBMBMI2-NEXT: movl %ecx, (%eax) -; X86-BMI1NOTBMBMI2-NEXT: retl +; X86-BMINOTBM-LABEL: c5_i32: +; X86-BMINOTBM: # %bb.0: +; X86-BMINOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMINOTBM-NEXT: movl $2579, %ecx # imm = 0xA13 +; X86-BMINOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMINOTBM-NEXT: movl %ecx, (%eax) +; X86-BMINOTBM-NEXT: retl +; +; X86-BMITBM-LABEL: c5_i32: +; X86-BMITBM: # %bb.0: +; X86-BMITBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMITBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %ecx # imm = 0xA13 +; X86-BMITBM-NEXT: movl %ecx, (%eax) +; X86-BMITBM-NEXT: retl ; ; X64-NOBMI-LABEL: c5_i32: ; X64-NOBMI: # %bb.0: @@ -8460,25 +8411,18 @@ define void @c5_i32(i32 %arg, i32* %ptr) nounwind { ; X64-NOBMI-NEXT: movl %edi, (%rsi) ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: c5_i32: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: movl %eax, (%rsi) -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1TBM-LABEL: c5_i32: -; X64-BMI1TBM: # %bb.0: -; X64-BMI1TBM-NEXT: bextrl $2579, %edi, %eax # imm = 0xA13 -; X64-BMI1TBM-NEXT: movl %eax, (%rsi) -; X64-BMI1TBM-NEXT: retq -; -; X64-BMI1NOTBMBMI2-LABEL: c5_i32: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 -; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBMBMI2-NEXT: movl %eax, (%rsi) -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMINOTBM-LABEL: c5_i32: +; X64-BMINOTBM: # %bb.0: +; X64-BMINOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X64-BMINOTBM-NEXT: bextrl %eax, %edi, %eax +; X64-BMINOTBM-NEXT: movl %eax, (%rsi) +; X64-BMINOTBM-NEXT: retq +; +; X64-BMITBM-LABEL: c5_i32: +; X64-BMITBM: # %bb.0: +; X64-BMITBM-NEXT: bextrl $2579, %edi, %eax # imm = 0xA13 +; X64-BMITBM-NEXT: movl %eax, (%rsi) +; X64-BMITBM-NEXT: retq %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 1023 store i32 %tmp1, i32* %ptr @@ -8496,28 +8440,20 @@ define void @c6_i32(i32 %arg, i32* %ptr) nounwind { ; X86-NOBMI-NEXT: movl %ecx, (%eax) ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: c6_i32: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl $3091, %ecx # imm = 0xC13 -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl %ecx, (%eax) -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1TBM-LABEL: c6_i32: -; X86-BMI1TBM: # %bb.0: -; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1TBM-NEXT: bextrl $3091, {{[0-9]+}}(%esp), %ecx # imm = 0xC13 -; X86-BMI1TBM-NEXT: movl %ecx, (%eax) -; X86-BMI1TBM-NEXT: retl -; -; X86-BMI1NOTBMBMI2-LABEL: c6_i32: -; X86-BMI1NOTBMBMI2: # %bb.0: -; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBMBMI2-NEXT: movl $3091, %ecx # imm = 0xC13 -; X86-BMI1NOTBMBMI2-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBMBMI2-NEXT: movl %ecx, (%eax) -; X86-BMI1NOTBMBMI2-NEXT: retl +; X86-BMINOTBM-LABEL: c6_i32: +; X86-BMINOTBM: # %bb.0: +; X86-BMINOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMINOTBM-NEXT: movl $3091, %ecx # imm = 0xC13 +; X86-BMINOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMINOTBM-NEXT: movl %ecx, (%eax) +; X86-BMINOTBM-NEXT: retl +; +; X86-BMITBM-LABEL: c6_i32: +; X86-BMITBM: # %bb.0: +; X86-BMITBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMITBM-NEXT: bextrl $3091, {{[0-9]+}}(%esp), %ecx # imm = 0xC13 +; X86-BMITBM-NEXT: movl %ecx, (%eax) +; X86-BMITBM-NEXT: retl ; ; X64-NOBMI-LABEL: c6_i32: ; X64-NOBMI: # %bb.0: @@ -8526,25 +8462,18 @@ define void @c6_i32(i32 %arg, i32* %ptr) nounwind { ; X64-NOBMI-NEXT: movl %edi, (%rsi) ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: c6_i32: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl $3091, %eax # imm = 0xC13 -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: movl %eax, (%rsi) -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1TBM-LABEL: c6_i32: -; X64-BMI1TBM: # %bb.0: -; X64-BMI1TBM-NEXT: bextrl $3091, %edi, %eax # imm = 0xC13 -; X64-BMI1TBM-NEXT: movl %eax, (%rsi) -; X64-BMI1TBM-NEXT: retq -; -; X64-BMI1NOTBMBMI2-LABEL: c6_i32: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movl $3091, %eax # imm = 0xC13 -; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBMBMI2-NEXT: movl %eax, (%rsi) -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMINOTBM-LABEL: c6_i32: +; X64-BMINOTBM: # %bb.0: +; X64-BMINOTBM-NEXT: movl $3091, %eax # imm = 0xC13 +; X64-BMINOTBM-NEXT: bextrl %eax, %edi, %eax +; X64-BMINOTBM-NEXT: movl %eax, (%rsi) +; X64-BMINOTBM-NEXT: retq +; +; X64-BMITBM-LABEL: c6_i32: +; X64-BMITBM: # %bb.0: +; X64-BMITBM-NEXT: bextrl $3091, %edi, %eax # imm = 0xC13 +; X64-BMITBM-NEXT: movl %eax, (%rsi) +; X64-BMITBM-NEXT: retq %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 4095 store i32 %tmp1, i32* %ptr @@ -8589,31 +8518,22 @@ define void @c5_i64(i64 %arg, i64* %ptr) nounwind { ; X86-NOBMI-NEXT: movl $0, 4(%eax) ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: c5_i64: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl $2579, %ecx # imm = 0xA13 -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl %ecx, (%eax) -; X86-BMI1NOTBM-NEXT: movl $0, 4(%eax) -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1TBM-LABEL: c5_i64: -; X86-BMI1TBM: # %bb.0: -; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %ecx # imm = 0xA13 -; X86-BMI1TBM-NEXT: movl %ecx, (%eax) -; X86-BMI1TBM-NEXT: movl $0, 4(%eax) -; X86-BMI1TBM-NEXT: retl -; -; X86-BMI1NOTBMBMI2-LABEL: c5_i64: -; X86-BMI1NOTBMBMI2: # %bb.0: -; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %ecx # imm = 0xA13 -; X86-BMI1NOTBMBMI2-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBMBMI2-NEXT: movl %ecx, (%eax) -; X86-BMI1NOTBMBMI2-NEXT: movl $0, 4(%eax) -; X86-BMI1NOTBMBMI2-NEXT: retl +; X86-BMINOTBM-LABEL: c5_i64: +; X86-BMINOTBM: # %bb.0: +; X86-BMINOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMINOTBM-NEXT: movl $2579, %ecx # imm = 0xA13 +; X86-BMINOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMINOTBM-NEXT: movl %ecx, (%eax) +; X86-BMINOTBM-NEXT: movl $0, 4(%eax) +; X86-BMINOTBM-NEXT: retl +; +; X86-BMITBM-LABEL: c5_i64: +; X86-BMITBM: # %bb.0: +; X86-BMITBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMITBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %ecx # imm = 0xA13 +; X86-BMITBM-NEXT: movl %ecx, (%eax) +; X86-BMITBM-NEXT: movl $0, 4(%eax) +; X86-BMITBM-NEXT: retl ; ; X64-NOBMI-LABEL: c5_i64: ; X64-NOBMI: # %bb.0: @@ -8622,25 +8542,18 @@ define void @c5_i64(i64 %arg, i64* %ptr) nounwind { ; X64-NOBMI-NEXT: movq %rdi, (%rsi) ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: c5_i64: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl $2611, %eax # imm = 0xA33 -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: movq %rax, (%rsi) -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1TBM-LABEL: c5_i64: -; X64-BMI1TBM: # %bb.0: -; X64-BMI1TBM-NEXT: bextrq $2611, %rdi, %rax # imm = 0xA33 -; X64-BMI1TBM-NEXT: movq %rax, (%rsi) -; X64-BMI1TBM-NEXT: retq -; -; X64-BMI1NOTBMBMI2-LABEL: c5_i64: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movl $2611, %eax # imm = 0xA33 -; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBMBMI2-NEXT: movq %rax, (%rsi) -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMINOTBM-LABEL: c5_i64: +; X64-BMINOTBM: # %bb.0: +; X64-BMINOTBM-NEXT: movl $2611, %eax # imm = 0xA33 +; X64-BMINOTBM-NEXT: bextrq %rax, %rdi, %rax +; X64-BMINOTBM-NEXT: movq %rax, (%rsi) +; X64-BMINOTBM-NEXT: retq +; +; X64-BMITBM-LABEL: c5_i64: +; X64-BMITBM: # %bb.0: +; X64-BMITBM-NEXT: bextrq $2611, %rdi, %rax # imm = 0xA33 +; X64-BMITBM-NEXT: movq %rax, (%rsi) +; X64-BMITBM-NEXT: retq %tmp0 = lshr i64 %arg, 51 %tmp1 = and i64 %tmp0, 1023 store i64 %tmp1, i64* %ptr @@ -8659,31 +8572,22 @@ define void @c6_i64(i64 %arg, i64* %ptr) nounwind { ; X86-NOBMI-NEXT: movl $0, 4(%eax) ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: c6_i64: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl $3091, %ecx # imm = 0xC13 -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl %ecx, (%eax) -; X86-BMI1NOTBM-NEXT: movl $0, 4(%eax) -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1TBM-LABEL: c6_i64: -; X86-BMI1TBM: # %bb.0: -; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1TBM-NEXT: bextrl $3091, {{[0-9]+}}(%esp), %ecx # imm = 0xC13 -; X86-BMI1TBM-NEXT: movl %ecx, (%eax) -; X86-BMI1TBM-NEXT: movl $0, 4(%eax) -; X86-BMI1TBM-NEXT: retl -; -; X86-BMI1NOTBMBMI2-LABEL: c6_i64: -; X86-BMI1NOTBMBMI2: # %bb.0: -; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBMBMI2-NEXT: movl $3091, %ecx # imm = 0xC13 -; X86-BMI1NOTBMBMI2-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBMBMI2-NEXT: movl %ecx, (%eax) -; X86-BMI1NOTBMBMI2-NEXT: movl $0, 4(%eax) -; X86-BMI1NOTBMBMI2-NEXT: retl +; X86-BMINOTBM-LABEL: c6_i64: +; X86-BMINOTBM: # %bb.0: +; X86-BMINOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMINOTBM-NEXT: movl $3091, %ecx # imm = 0xC13 +; X86-BMINOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMINOTBM-NEXT: movl %ecx, (%eax) +; X86-BMINOTBM-NEXT: movl $0, 4(%eax) +; X86-BMINOTBM-NEXT: retl +; +; X86-BMITBM-LABEL: c6_i64: +; X86-BMITBM: # %bb.0: +; X86-BMITBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMITBM-NEXT: bextrl $3091, {{[0-9]+}}(%esp), %ecx # imm = 0xC13 +; X86-BMITBM-NEXT: movl %ecx, (%eax) +; X86-BMITBM-NEXT: movl $0, 4(%eax) +; X86-BMITBM-NEXT: retl ; ; X64-NOBMI-LABEL: c6_i64: ; X64-NOBMI: # %bb.0: @@ -8692,25 +8596,18 @@ define void @c6_i64(i64 %arg, i64* %ptr) nounwind { ; X64-NOBMI-NEXT: movq %rdi, (%rsi) ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: c6_i64: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl $3123, %eax # imm = 0xC33 -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: movq %rax, (%rsi) -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1TBM-LABEL: c6_i64: -; X64-BMI1TBM: # %bb.0: -; X64-BMI1TBM-NEXT: bextrq $3123, %rdi, %rax # imm = 0xC33 -; X64-BMI1TBM-NEXT: movq %rax, (%rsi) -; X64-BMI1TBM-NEXT: retq -; -; X64-BMI1NOTBMBMI2-LABEL: c6_i64: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movl $3123, %eax # imm = 0xC33 -; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBMBMI2-NEXT: movq %rax, (%rsi) -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMINOTBM-LABEL: c6_i64: +; X64-BMINOTBM: # %bb.0: +; X64-BMINOTBM-NEXT: movl $3123, %eax # imm = 0xC33 +; X64-BMINOTBM-NEXT: bextrq %rax, %rdi, %rax +; X64-BMINOTBM-NEXT: movq %rax, (%rsi) +; X64-BMINOTBM-NEXT: retq +; +; X64-BMITBM-LABEL: c6_i64: +; X64-BMITBM: # %bb.0: +; X64-BMITBM-NEXT: bextrq $3123, %rdi, %rax # imm = 0xC33 +; X64-BMITBM-NEXT: movq %rax, (%rsi) +; X64-BMITBM-NEXT: retq %tmp0 = lshr i64 %arg, 51 %tmp1 = and i64 %tmp0, 4095 store i64 %tmp1, i64* %ptr diff --git a/llvm/test/CodeGen/X86/extract-lowbits.ll b/llvm/test/CodeGen/X86/extract-lowbits.ll index 4c11d55..177f99e 100644 --- a/llvm/test/CodeGen/X86/extract-lowbits.ll +++ b/llvm/test/CodeGen/X86/extract-lowbits.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI,X86-NOBMI -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1NOTBM,X86-BMI1NOTBM -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1TBM,X86-BMI1TBM -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1TBM,X86-BMI1TBM,BMI1TBMBMI2,X86-BMI1TBMBMI2 -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1NOTBMBMI2,X86-BMI1NOTBMBMI2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI,X64-NOBMI -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1NOTBM,X64-BMI1NOTBM -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1TBM,X64-BMI1TBM -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1TBM,X64-BMI1TBM,BMI1TBMBMI2,X64-BMI1TBMBMI2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1NOTBMBMI2,X64-BMI1NOTBMBMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI1 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI1 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI1,X64-BMI1NOTBM +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI1,X64-BMI1TBM +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,X64-BMI2TBM +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,X64-BMI2NOTBM ; *Please* keep in sync with test/CodeGen/AArch64/extract-lowbits.ll @@ -37,18 +37,18 @@ define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_a0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi32_a0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi32_a0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi32_a0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_a0: ; X64-NOBMI: # %bb.0: @@ -60,16 +60,16 @@ define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_a0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_a0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_a0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_a0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 %masked = and i32 %mask, %val @@ -86,18 +86,18 @@ define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_a1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi32_a1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi32_a1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi32_a1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_a1_indexzext: ; X64-NOBMI: # %bb.0: @@ -109,16 +109,16 @@ define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_a1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_a1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_a1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_a1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %conv = zext i8 %numlowbits to i32 %onebit = shl i32 1, %conv %mask = add nsw i32 %onebit, -1 @@ -137,20 +137,20 @@ define i32 @bzhi32_a2_load(i32* %w, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl (%edx), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_a2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_a2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_a2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_a2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_a2_load: ; X64-NOBMI: # %bb.0: @@ -162,16 +162,16 @@ define i32 @bzhi32_a2_load(i32* %w, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl (%rdi), %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_a2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_a2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_a2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_a2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 @@ -190,20 +190,20 @@ define i32 @bzhi32_a3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl (%edx), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_a3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_a3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_a3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_a3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_a3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -215,16 +215,16 @@ define i32 @bzhi32_a3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl (%rdi), %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_a3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_a3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_a3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_a3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %conv = zext i8 %numlowbits to i32 %onebit = shl i32 1, %conv @@ -243,18 +243,18 @@ define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_a4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi32_a4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi32_a4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi32_a4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_a4_commutative: ; X64-NOBMI: # %bb.0: @@ -266,16 +266,16 @@ define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_a4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_a4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_a4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_a4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 %masked = and i32 %val, %mask ; swapped order @@ -304,43 +304,43 @@ define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_a0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB5_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB5_2: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_a0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB5_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB5_2: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_a0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB5_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB5_2: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_a0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB5_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB5_2: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_a0: ; X64-NOBMI: # %bb.0: @@ -352,16 +352,16 @@ define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_a0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_a0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_a0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_a0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 %masked = and i64 %mask, %val @@ -388,43 +388,43 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_a1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB6_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB6_2: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_a1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB6_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB6_2: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_a1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB6_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB6_2: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_a1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB6_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB6_2: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_a1_indexzext: ; X64-NOBMI: # %bb.0: @@ -436,18 +436,18 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_a1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_a1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_a1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_a1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq %conv = zext i8 %numlowbits to i64 %onebit = shl i64 1, %conv %mask = add nsw i64 %onebit, -1 @@ -478,49 +478,49 @@ define i64 @bzhi64_a2_load(i64* %w, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_a2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB7_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB7_2: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl 4(%esi), %edx -; X86-BMI1NOTBM-NEXT: andl (%esi), %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_a2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB7_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB7_2: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl 4(%esi), %edx -; X86-BMI1BMI2-NEXT: andl (%esi), %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_a2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB7_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB7_2: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl 4(%esi), %edx +; X86-BMI1-NEXT: andl (%esi), %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_a2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB7_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB7_2: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl 4(%esi), %edx +; X86-BMI2-NEXT: andl (%esi), %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_a2_load: ; X64-NOBMI: # %bb.0: @@ -532,16 +532,16 @@ define i64 @bzhi64_a2_load(i64* %w, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq (%rdi), %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_a2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_a2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_a2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_a2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 @@ -572,49 +572,49 @@ define i64 @bzhi64_a3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_a3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB8_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB8_2: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl 4(%esi), %edx -; X86-BMI1NOTBM-NEXT: andl (%esi), %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_a3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB8_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB8_2: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl 4(%esi), %edx -; X86-BMI1BMI2-NEXT: andl (%esi), %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_a3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB8_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB8_2: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl 4(%esi), %edx +; X86-BMI1-NEXT: andl (%esi), %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_a3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB8_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB8_2: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl 4(%esi), %edx +; X86-BMI2-NEXT: andl (%esi), %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_a3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -626,18 +626,18 @@ define i64 @bzhi64_a3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq (%rdi), %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_a3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_a3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_a3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_a3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %conv = zext i8 %numlowbits to i64 %onebit = shl i64 1, %conv @@ -666,43 +666,43 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_a4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB9_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB9_2: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_a4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB9_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB9_2: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_a4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB9_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB9_2: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_a4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB9_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB9_2: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_a4_commutative: ; X64-NOBMI: # %bb.0: @@ -714,16 +714,16 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_a4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_a4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_a4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_a4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 %masked = and i64 %val, %mask ; swapped order @@ -749,34 +749,34 @@ define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_a0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB10_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: .LBB10_2: -; X86-BMI1NOTBM-NEXT: decl %eax -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_32_a0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB10_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB10_2: -; X86-BMI1BMI2-NEXT: decl %eax -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_a0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $1, %edx +; X86-BMI1-NEXT: shll %cl, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB10_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: .LBB10_2: +; X86-BMI1-NEXT: decl %eax +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_32_a0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB10_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: .LBB10_2: +; X86-BMI2-NEXT: decl %eax +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_a0: ; X64-NOBMI: # %bb.0: @@ -789,16 +789,16 @@ define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_a0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_a0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_a0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_a0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 %masked = and i64 %mask, %val @@ -817,18 +817,18 @@ define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_a1: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_a1: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi64_32_a1: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi64_32_a1: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_a1: ; X64-NOBMI: # %bb.0: @@ -840,16 +840,16 @@ define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_a1: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_a1: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_a1: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_a1: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %truncval = trunc i64 %val to i32 %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 @@ -879,36 +879,36 @@ define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_a1_trunc_extrause: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: shll $8, %ebx -; X86-BMI1NOTBM-NEXT: bextrl %ebx, %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_32_a1_trunc_extrause: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl %esi, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_a1_trunc_extrause: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: shll $8, %ebx +; X86-BMI1-NEXT: bextrl %ebx, %esi, %eax +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_32_a1_trunc_extrause: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %esi, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause: ; X64-NOBMI: # %bb.0: @@ -928,34 +928,34 @@ define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_a1_trunc_extrause: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ebx -; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: shll $8, %ebx -; X64-BMI1NOTBM-NEXT: bextrl %ebx, %r14d, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_32_a1_trunc_extrause: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %esi, %ebp -; X64-BMI1BMI2-NEXT: movq %rdi, %rbx -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebp, %ebx, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_a1_trunc_extrause: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ebx +; X64-BMI1-NEXT: movq %rdi, %r14 +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: shll $8, %ebx +; X64-BMI1-NEXT: bextrl %ebx, %r14d, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_32_a1_trunc_extrause: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %esi, %ebp +; X64-BMI2-NEXT: movq %rdi, %rbx +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebp, %ebx, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %truncval = trunc i64 %val to i32 call void @use32(i32 %truncval) %onebit = shl i32 1, %numlowbits @@ -976,18 +976,18 @@ define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_a2: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_a2: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi64_32_a2: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi64_32_a2: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_a2: ; X64-NOBMI: # %bb.0: @@ -999,16 +999,16 @@ define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_a2: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_a2: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_a2: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_a2: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 %zextmask = zext i32 %mask to i64 @@ -1035,34 +1035,34 @@ define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_a3: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB14_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: .LBB14_2: -; X86-BMI1NOTBM-NEXT: decl %eax -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_32_a3: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB14_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB14_2: -; X86-BMI1BMI2-NEXT: decl %eax -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_a3: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $1, %edx +; X86-BMI1-NEXT: shll %cl, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB14_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: .LBB14_2: +; X86-BMI1-NEXT: decl %eax +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_32_a3: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB14_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: .LBB14_2: +; X86-BMI2-NEXT: decl %eax +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_a3: ; X64-NOBMI: # %bb.0: @@ -1075,16 +1075,16 @@ define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_a3: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_a3: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_a3: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_a3: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, 4294967295 %masked = and i64 %mask, %val @@ -1106,18 +1106,18 @@ define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_b0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi32_b0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi32_b0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi32_b0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_b0: ; X64-NOBMI: # %bb.0: @@ -1129,16 +1129,16 @@ define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_b0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_b0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_b0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_b0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %notmask = shl i32 -1, %numlowbits %mask = xor i32 %notmask, -1 %masked = and i32 %mask, %val @@ -1155,18 +1155,18 @@ define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_b1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi32_b1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi32_b1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi32_b1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_b1_indexzext: ; X64-NOBMI: # %bb.0: @@ -1178,16 +1178,16 @@ define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_b1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_b1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_b1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_b1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %conv = zext i8 %numlowbits to i32 %notmask = shl i32 -1, %conv %mask = xor i32 %notmask, -1 @@ -1206,20 +1206,20 @@ define i32 @bzhi32_b2_load(i32* %w, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl (%edx), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_b2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_b2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_b2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_b2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_b2_load: ; X64-NOBMI: # %bb.0: @@ -1231,16 +1231,16 @@ define i32 @bzhi32_b2_load(i32* %w, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl (%rdi), %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_b2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_b2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_b2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_b2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %notmask = shl i32 -1, %numlowbits %mask = xor i32 %notmask, -1 @@ -1259,20 +1259,20 @@ define i32 @bzhi32_b3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl (%edx), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_b3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_b3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_b3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_b3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_b3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -1284,16 +1284,16 @@ define i32 @bzhi32_b3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl (%rdi), %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_b3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_b3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_b3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_b3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %conv = zext i8 %numlowbits to i32 %notmask = shl i32 -1, %conv @@ -1312,18 +1312,18 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_b4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi32_b4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi32_b4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi32_b4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_b4_commutative: ; X64-NOBMI: # %bb.0: @@ -1335,16 +1335,16 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_b4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_b4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_b4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_b4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %notmask = shl i32 -1, %numlowbits %mask = xor i32 %notmask, -1 %masked = and i32 %val, %mask ; swapped order @@ -1377,36 +1377,36 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_b0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %edx -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB20_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB20_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_b0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax -; X86-BMI1BMI2-NEXT: testb $32, %dl -; X86-BMI1BMI2-NEXT: je .LBB20_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB20_2: -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_b0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %edx +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB20_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB20_2: +; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_b0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax +; X86-BMI2-NEXT: testb $32, %dl +; X86-BMI2-NEXT: je .LBB20_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB20_2: +; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_b0: ; X64-NOBMI: # %bb.0: @@ -1418,16 +1418,16 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_b0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_b0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_b0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_b0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq %notmask = shl i64 -1, %numlowbits %mask = xor i64 %notmask, -1 %masked = and i64 %mask, %val @@ -1458,36 +1458,36 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_b1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %edx -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB21_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB21_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_b1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax -; X86-BMI1BMI2-NEXT: testb $32, %dl -; X86-BMI1BMI2-NEXT: je .LBB21_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB21_2: -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_b1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %edx +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB21_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB21_2: +; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_b1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax +; X86-BMI2-NEXT: testb $32, %dl +; X86-BMI2-NEXT: je .LBB21_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB21_2: +; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_b1_indexzext: ; X64-NOBMI: # %bb.0: @@ -1499,18 +1499,18 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_b1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_b1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_b1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_b1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq %conv = zext i8 %numlowbits to i64 %notmask = shl i64 -1, %conv %mask = xor i64 %notmask, -1 @@ -1545,42 +1545,42 @@ define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_b2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB22_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %esi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB22_2: -; X86-BMI1NOTBM-NEXT: andnl (%edx), %eax, %eax -; X86-BMI1NOTBM-NEXT: andnl 4(%edx), %esi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_b2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl $-1, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %edx, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB22_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB22_2: -; X86-BMI1BMI2-NEXT: andnl (%ecx), %eax, %eax -; X86-BMI1BMI2-NEXT: andnl 4(%ecx), %edx, %edx -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_b2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB22_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB22_2: +; X86-BMI1-NEXT: andnl (%edx), %eax, %eax +; X86-BMI1-NEXT: andnl 4(%edx), %esi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_b2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl $-1, %edx +; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB22_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB22_2: +; X86-BMI2-NEXT: andnl (%ecx), %eax, %eax +; X86-BMI2-NEXT: andnl 4(%ecx), %edx, %edx +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_b2_load: ; X64-NOBMI: # %bb.0: @@ -1592,16 +1592,16 @@ define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq (%rdi), %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_b2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_b2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_b2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_b2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %notmask = shl i64 -1, %numlowbits %mask = xor i64 %notmask, -1 @@ -1636,42 +1636,42 @@ define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_b3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB23_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %esi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB23_2: -; X86-BMI1NOTBM-NEXT: andnl (%edx), %eax, %eax -; X86-BMI1NOTBM-NEXT: andnl 4(%edx), %esi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl $-1, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %edx, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB23_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB23_2: -; X86-BMI1BMI2-NEXT: andnl (%ecx), %eax, %eax -; X86-BMI1BMI2-NEXT: andnl 4(%ecx), %edx, %edx -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_b3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB23_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB23_2: +; X86-BMI1-NEXT: andnl (%edx), %eax, %eax +; X86-BMI1-NEXT: andnl 4(%edx), %esi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_b3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl $-1, %edx +; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB23_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB23_2: +; X86-BMI2-NEXT: andnl (%ecx), %eax, %eax +; X86-BMI2-NEXT: andnl 4(%ecx), %edx, %edx +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_b3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -1683,18 +1683,18 @@ define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq (%rdi), %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_b3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_b3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_b3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %conv = zext i8 %numlowbits to i64 %notmask = shl i64 -1, %conv @@ -1727,36 +1727,36 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_b4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %edx -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB24_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB24_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_b4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax -; X86-BMI1BMI2-NEXT: testb $32, %dl -; X86-BMI1BMI2-NEXT: je .LBB24_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB24_2: -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_b4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %edx +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB24_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB24_2: +; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_b4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax +; X86-BMI2-NEXT: testb $32, %dl +; X86-BMI2-NEXT: je .LBB24_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB24_2: +; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_b4_commutative: ; X64-NOBMI: # %bb.0: @@ -1768,16 +1768,16 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_b4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_b4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_b4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_b4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq %notmask = shl i64 -1, %numlowbits %mask = xor i64 %notmask, -1 %masked = and i64 %val, %mask ; swapped order @@ -1803,32 +1803,32 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_b0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB25_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB25_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_32_b0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: jne .LBB25_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shlxl %eax, %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB25_2: -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_b0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB25_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB25_2: +; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_32_b0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB25_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X86-BMI2-NEXT: .LBB25_2: +; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_b0: ; X64-NOBMI: # %bb.0: @@ -1841,16 +1841,16 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_b0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_b0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_b0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_b0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %widenumlowbits = zext i8 %numlowbits to i64 %notmask = shl nsw i64 -1, %widenumlowbits %mask = xor i64 %notmask, -1 @@ -1870,18 +1870,18 @@ define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_b1: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_b1: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi64_32_b1: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi64_32_b1: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_b1: ; X64-NOBMI: # %bb.0: @@ -1893,16 +1893,16 @@ define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_b1: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_b1: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_b1: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_b1: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %truncval = trunc i64 %val to i32 %widenumlowbits = zext i8 %numlowbits to i32 %notmask = shl nsw i32 -1, %widenumlowbits @@ -1923,18 +1923,18 @@ define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_b2: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_b2: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi64_32_b2: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi64_32_b2: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_b2: ; X64-NOBMI: # %bb.0: @@ -1946,16 +1946,16 @@ define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_b2: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_b2: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_b2: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_b2: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %widenumlowbits = zext i8 %numlowbits to i32 %notmask = shl nsw i32 -1, %widenumlowbits %mask = xor i32 %notmask, -1 @@ -1983,32 +1983,32 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_b3: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB28_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB28_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_32_b3: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: jne .LBB28_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shlxl %eax, %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB28_2: -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_b3: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB28_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB28_2: +; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_32_b3: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB28_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X86-BMI2-NEXT: .LBB28_2: +; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_b3: ; X64-NOBMI: # %bb.0: @@ -2022,16 +2022,16 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_b3: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_b3: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_b3: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_b3: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %widenumlowbits = zext i8 %numlowbits to i64 %notmask = shl nsw i64 4294967295, %widenumlowbits %mask = xor i64 %notmask, 4294967295 @@ -2064,38 +2064,38 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_c0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $8, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_c0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: addl $8, %esp -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_c0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $8, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_c0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_c0: ; X64-NOBMI: # %bb.0: @@ -2117,43 +2117,43 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_c0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movl %edi, %ebx -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebp -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebx, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi32_c0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %esi, %ebx -; X64-BMI1BMI2-NEXT: movl %edi, %ebp -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi32_c0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl %edi, %ebx +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movl $-1, %ebp +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: movl %ebp, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebx, %ebp +; X64-BMI1-NEXT: movl %ebp, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi32_c0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %esi, %ebx +; X64-BMI2-NEXT: movl %edi, %ebp +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits call void @use32(i32 %mask) @@ -2179,38 +2179,38 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_c1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $8, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_c1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: addl $8, %esp -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_c1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $8, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_c1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_c1_indexzext: ; X64-NOBMI: # %bb.0: @@ -2232,43 +2232,43 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_c1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movl %edi, %ebx -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebp -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebx, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi32_c1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %esi, %ebx -; X64-BMI1BMI2-NEXT: movl %edi, %ebp -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi32_c1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl %edi, %ebx +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movl $-1, %ebp +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: movl %ebp, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebx, %ebp +; X64-BMI1-NEXT: movl %ebp, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi32_c1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %esi, %ebx +; X64-BMI2-NEXT: movl %edi, %ebp +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %numhighbits = sub i8 32, %numlowbits %sh_prom = zext i8 %numhighbits to i32 %mask = lshr i32 -1, %sh_prom @@ -2297,42 +2297,42 @@ define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_c2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %edx -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: movl (%eax), %esi -; X86-BMI1NOTBM-NEXT: andl %edx, %esi -; X86-BMI1NOTBM-NEXT: movl %edx, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $8, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_c2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %esi -; X86-BMI1BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X86-BMI1BMI2-NEXT: negb %cl -; X86-BMI1BMI2-NEXT: movl $-1, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: addl $8, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_c2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %edx +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: movl (%eax), %esi +; X86-BMI1-NEXT: andl %edx, %esi +; X86-BMI1-NEXT: movl %edx, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $8, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_c2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: bzhil %ecx, (%eax), %esi +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X86-BMI2-NEXT: negb %cl +; X86-BMI2-NEXT: movl $-1, %eax +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_c2_load: ; X64-NOBMI: # %bb.0: @@ -2350,34 +2350,34 @@ define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_c2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movl $-1, %eax -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %eax -; X64-BMI1NOTBM-NEXT: movl (%rdi), %ebx -; X64-BMI1NOTBM-NEXT: andl %eax, %ebx -; X64-BMI1NOTBM-NEXT: movl %eax, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: movl %ebx, %eax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi32_c2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %ebx -; X64-BMI1BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi -; X64-BMI1BMI2-NEXT: negb %sil -; X64-BMI1BMI2-NEXT: movl $-1, %eax -; X64-BMI1BMI2-NEXT: shrxl %esi, %eax, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi32_c2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movl $-1, %eax +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %eax +; X64-BMI1-NEXT: movl (%rdi), %ebx +; X64-BMI1-NEXT: andl %eax, %ebx +; X64-BMI1-NEXT: movl %eax, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: movl %ebx, %eax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi32_c2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: bzhil %esi, (%rdi), %ebx +; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: shrxl %esi, %eax, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -2406,42 +2406,42 @@ define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_c3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %edx -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: movl (%eax), %esi -; X86-BMI1NOTBM-NEXT: andl %edx, %esi -; X86-BMI1NOTBM-NEXT: movl %edx, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $8, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %esi -; X86-BMI1BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X86-BMI1BMI2-NEXT: negb %cl -; X86-BMI1BMI2-NEXT: movl $-1, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: addl $8, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_c3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %edx +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: movl (%eax), %esi +; X86-BMI1-NEXT: andl %edx, %esi +; X86-BMI1-NEXT: movl %edx, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $8, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_c3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: bzhil %ecx, (%eax), %esi +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X86-BMI2-NEXT: negb %cl +; X86-BMI2-NEXT: movl $-1, %eax +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -2459,34 +2459,34 @@ define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_c3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movl $-1, %eax -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %eax -; X64-BMI1NOTBM-NEXT: movl (%rdi), %ebx -; X64-BMI1NOTBM-NEXT: andl %eax, %ebx -; X64-BMI1NOTBM-NEXT: movl %eax, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: movl %ebx, %eax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %ebx -; X64-BMI1BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi -; X64-BMI1BMI2-NEXT: negb %sil -; X64-BMI1BMI2-NEXT: movl $-1, %eax -; X64-BMI1BMI2-NEXT: shrxl %esi, %eax, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi32_c3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movl $-1, %eax +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %eax +; X64-BMI1-NEXT: movl (%rdi), %ebx +; X64-BMI1-NEXT: andl %eax, %ebx +; X64-BMI1-NEXT: movl %eax, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: movl %ebx, %eax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi32_c3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: bzhil %esi, (%rdi), %ebx +; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: shrxl %esi, %eax, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %numhighbits = sub i8 32, %numlowbits %sh_prom = zext i8 %numhighbits to i32 @@ -2514,38 +2514,38 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_c4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $8, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_c4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: addl $8, %esp -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_c4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $8, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_c4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_c4_commutative: ; X64-NOBMI: # %bb.0: @@ -2567,43 +2567,43 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_c4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movl %edi, %ebx -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebp -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebx, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi32_c4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %esi, %ebx -; X64-BMI1BMI2-NEXT: movl %edi, %ebp -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi32_c4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl %edi, %ebx +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movl $-1, %ebp +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: movl %ebp, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebx, %ebp +; X64-BMI1-NEXT: movl %ebp, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi32_c4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %esi, %ebx +; X64-BMI2-NEXT: movl %edi, %ebp +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits call void @use32(i32 %mask) @@ -2646,64 +2646,64 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_c0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB34_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB34_2: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_c0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %edi -; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB34_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %edi -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: .LBB34_2: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_c0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: movl $-1, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB34_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB34_2: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_c0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %edi +; X86-BMI2-NEXT: shrxl %eax, %edi, %esi +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB34_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %esi, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: .LBB34_2: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_c0: ; X64-NOBMI: # %bb.0: @@ -2725,43 +2725,43 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %r14 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_c0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r14, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_c0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movq %rsi, %rbx -; X64-BMI1BMI2-NEXT: movq %rdi, %r14 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_c0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: movq %rdi, %r14 +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r14, %rbx +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_c0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movq %rsi, %rbx +; X64-BMI2-NEXT: movq %rdi, %r14 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits call void @use64(i64 %mask) @@ -2800,64 +2800,64 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_c1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB35_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB35_2: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_c1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %edi -; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB35_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %edi -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: .LBB35_2: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_c1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: movl $-1, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB35_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB35_2: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_c1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %edi +; X86-BMI2-NEXT: shrxl %eax, %edi, %esi +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB35_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %esi, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: .LBB35_2: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_c1_indexzext: ; X64-NOBMI: # %bb.0: @@ -2879,43 +2879,43 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %r14 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_c1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r14, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_c1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %esi, %ebx -; X64-BMI1BMI2-NEXT: movq %rdi, %r14 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_c1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movq %rdi, %r14 +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r14, %rbx +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_c1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %esi, %ebx +; X64-BMI2-NEXT: movq %rdi, %r14 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: retq %numhighbits = sub i8 64, %numlowbits %sh_prom = zext i8 %numhighbits to i64 %mask = lshr i64 -1, %sh_prom @@ -2958,70 +2958,70 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_c2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB36_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB36_2: -; X86-BMI1NOTBM-NEXT: movl 4(%edx), %esi -; X86-BMI1NOTBM-NEXT: andl %ebx, %esi -; X86-BMI1NOTBM-NEXT: movl (%edx), %edi -; X86-BMI1NOTBM-NEXT: andl %eax, %edi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_c2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb $64, %bl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %ebx, %ecx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB36_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %ecx -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB36_2: -; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi -; X86-BMI1BMI2-NEXT: andl %edx, %esi -; X86-BMI1BMI2-NEXT: movl (%eax), %edi -; X86-BMI1BMI2-NEXT: andl %ecx, %edi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %edx -; X86-BMI1BMI2-NEXT: pushl %ecx -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_c2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: shrl %cl, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB36_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB36_2: +; X86-BMI1-NEXT: movl 4(%edx), %esi +; X86-BMI1-NEXT: andl %ebx, %esi +; X86-BMI1-NEXT: movl (%edx), %edi +; X86-BMI1-NEXT: andl %eax, %edi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_c2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb $64, %bl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %ebx, %ecx, %edx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB36_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB36_2: +; X86-BMI2-NEXT: movl 4(%eax), %esi +; X86-BMI2-NEXT: andl %edx, %esi +; X86-BMI2-NEXT: movl (%eax), %edi +; X86-BMI2-NEXT: andl %ecx, %edi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %edx +; X86-BMI2-NEXT: pushl %ecx +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_c2_load: ; X64-NOBMI: # %bb.0: @@ -3039,34 +3039,34 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_c2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movq $-1, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rax -; X64-BMI1NOTBM-NEXT: movq (%rdi), %rbx -; X64-BMI1NOTBM-NEXT: andq %rax, %rbx -; X64-BMI1NOTBM-NEXT: movq %rax, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_c2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rbx -; X64-BMI1BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi -; X64-BMI1BMI2-NEXT: negb %sil -; X64-BMI1BMI2-NEXT: movq $-1, %rax -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rax, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: movq %rbx, %rax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_c2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movq $-1, %rax +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %rax +; X64-BMI1-NEXT: movq (%rdi), %rbx +; X64-BMI1-NEXT: andq %rax, %rbx +; X64-BMI1-NEXT: movq %rax, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_c2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rbx +; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: movq %rbx, %rax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits @@ -3109,70 +3109,70 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_c3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB37_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB37_2: -; X86-BMI1NOTBM-NEXT: movl 4(%edx), %esi -; X86-BMI1NOTBM-NEXT: andl %ebx, %esi -; X86-BMI1NOTBM-NEXT: movl (%edx), %edi -; X86-BMI1NOTBM-NEXT: andl %eax, %edi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb $64, %bl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %ebx, %ecx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB37_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %ecx -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB37_2: -; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi -; X86-BMI1BMI2-NEXT: andl %edx, %esi -; X86-BMI1BMI2-NEXT: movl (%eax), %edi -; X86-BMI1BMI2-NEXT: andl %ecx, %edi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %edx -; X86-BMI1BMI2-NEXT: pushl %ecx -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_c3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: shrl %cl, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB37_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB37_2: +; X86-BMI1-NEXT: movl 4(%edx), %esi +; X86-BMI1-NEXT: andl %ebx, %esi +; X86-BMI1-NEXT: movl (%edx), %edi +; X86-BMI1-NEXT: andl %eax, %edi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_c3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb $64, %bl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %ebx, %ecx, %edx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB37_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB37_2: +; X86-BMI2-NEXT: movl 4(%eax), %esi +; X86-BMI2-NEXT: andl %edx, %esi +; X86-BMI2-NEXT: movl (%eax), %edi +; X86-BMI2-NEXT: andl %ecx, %edi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %edx +; X86-BMI2-NEXT: pushl %ecx +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -3190,35 +3190,35 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_c3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movq $-1, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rax -; X64-BMI1NOTBM-NEXT: movq (%rdi), %rbx -; X64-BMI1NOTBM-NEXT: andq %rax, %rbx -; X64-BMI1NOTBM-NEXT: movq %rax, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rbx -; X64-BMI1BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi -; X64-BMI1BMI2-NEXT: negb %sil -; X64-BMI1BMI2-NEXT: movq $-1, %rax -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rax, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: movq %rbx, %rax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_c3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movq $-1, %rax +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrq %cl, %rax +; X64-BMI1-NEXT: movq (%rdi), %rbx +; X64-BMI1-NEXT: andq %rax, %rbx +; X64-BMI1-NEXT: movq %rax, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_c3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rbx +; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: movq %rbx, %rax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %numhighbits = sub i8 64, %numlowbits %sh_prom = zext i8 %numhighbits to i64 @@ -3259,64 +3259,64 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_c4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB38_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB38_2: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_c4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %edi -; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB38_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %edi -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: .LBB38_2: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_c4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: movl $-1, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB38_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB38_2: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_c4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %edi +; X86-BMI2-NEXT: shrxl %eax, %edi, %esi +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB38_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %esi, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: .LBB38_2: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_c4_commutative: ; X64-NOBMI: # %bb.0: @@ -3338,43 +3338,43 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %r14 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_c4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r14, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_c4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movq %rsi, %rbx -; X64-BMI1BMI2-NEXT: movq %rdi, %r14 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_c4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: movq %rdi, %r14 +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r14, %rbx +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_c4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movq %rsi, %rbx +; X64-BMI2-NEXT: movq %rdi, %r14 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits call void @use64(i64 %mask) @@ -3400,32 +3400,32 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_c0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB39_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: .LBB39_2: -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_32_c0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB39_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB39_2: -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_c0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB39_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: .LBB39_2: +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_32_c0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl $-1, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB39_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: .LBB39_2: +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_c0: ; X64-NOBMI: # %bb.0: @@ -3438,16 +3438,16 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_c0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_c0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_c0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_c0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits %masked = and i64 %mask, %val @@ -3467,18 +3467,18 @@ define i32 @bzhi64_32_c1(i64 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_c1: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_c1: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi64_32_c1: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi64_32_c1: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_c1: ; X64-NOBMI: # %bb.0: @@ -3491,16 +3491,16 @@ define i32 @bzhi64_32_c1(i64 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_c1: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_c1: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_c1: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_c1: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %truncval = trunc i64 %val to i32 %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -3521,18 +3521,18 @@ define i32 @bzhi64_32_c2(i64 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_c2: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_c2: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi64_32_c2: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi64_32_c2: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_c2: ; X64-NOBMI: # %bb.0: @@ -3545,16 +3545,16 @@ define i32 @bzhi64_32_c2(i64 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_c2: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_c2: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_c2: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_c2: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits %zextmask = zext i32 %mask to i64 @@ -3592,25 +3592,25 @@ define i32 @bzhi64_32_c3(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_c3: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rax -; X64-BMI1NOTBM-NEXT: andl %edi, %eax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_32_c3: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: negb %sil -; X64-BMI1BMI2-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rax, %rax -; X64-BMI1BMI2-NEXT: andl %edi, %eax -; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_c3: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %rax +; X64-BMI1-NEXT: andl %edi, %eax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_32_c3: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: andl %edi, %eax +; X64-BMI2-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 4294967295, %numhighbits %masked = and i64 %mask, %val @@ -3633,18 +3633,18 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_d0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi32_d0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi32_d0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi32_d0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_d0: ; X64-NOBMI: # %bb.0: @@ -3656,16 +3656,16 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_d0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_d0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_d0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_d0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %numhighbits = sub i32 32, %numlowbits %highbitscleared = shl i32 %val, %numhighbits %masked = lshr i32 %highbitscleared, %numhighbits @@ -3683,18 +3683,18 @@ define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_d1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi32_d1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi32_d1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi32_d1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_d1_indexzext: ; X64-NOBMI: # %bb.0: @@ -3706,16 +3706,16 @@ define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_d1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_d1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_d1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_d1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %numhighbits = sub i8 32, %numlowbits %sh_prom = zext i8 %numhighbits to i32 %highbitscleared = shl i32 %val, %sh_prom @@ -3735,20 +3735,20 @@ define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_d2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_d2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_d2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_d2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_d2_load: ; X64-NOBMI: # %bb.0: @@ -3760,16 +3760,16 @@ define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_d2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_d2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_d2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_d2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %numhighbits = sub i32 32, %numlowbits %highbitscleared = shl i32 %val, %numhighbits @@ -3789,20 +3789,20 @@ define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_d3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_d3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_d3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -3814,16 +3814,16 @@ define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_d3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_d3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_d3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %numhighbits = sub i8 32, %numlowbits %sh_prom = zext i8 %numhighbits to i32 @@ -3874,76 +3874,76 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_d0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: jne .LBB47_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: .LBB47_2: -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB47_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB47_4: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB47_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: .LBB47_6: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_d0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB47_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi -; X86-BMI1BMI2-NEXT: .LBB47_2: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB47_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB47_4: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB47_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: .LBB47_6: -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_d0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: shldl %cl, %edx, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: jne .LBB47_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: .LBB47_2: +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edx +; X86-BMI1-NEXT: jne .LBB47_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %ebx +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB47_4: +; X86-BMI1-NEXT: shrdl %cl, %edi, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB47_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: .LBB47_6: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_d0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %edi +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB47_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl $0, %edi +; X86-BMI2-NEXT: .LBB47_2: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI2-NEXT: jne .LBB47_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: .LBB47_4: +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB47_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: .LBB47_6: +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_d0: ; X64-NOBMI: # %bb.0: @@ -3955,16 +3955,16 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_d0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_d0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_d0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_d0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits %highbitscleared = shl i64 %val, %numhighbits %masked = lshr i64 %highbitscleared, %numhighbits @@ -4011,76 +4011,76 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_d1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: jne .LBB48_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: .LBB48_2: -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB48_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB48_4: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB48_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: .LBB48_6: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_d1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB48_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi -; X86-BMI1BMI2-NEXT: .LBB48_2: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB48_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB48_4: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB48_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: .LBB48_6: -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_d1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: shldl %cl, %edx, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: jne .LBB48_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: .LBB48_2: +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edx +; X86-BMI1-NEXT: jne .LBB48_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %ebx +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB48_4: +; X86-BMI1-NEXT: shrdl %cl, %edi, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB48_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: .LBB48_6: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_d1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %edi +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB48_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl $0, %edi +; X86-BMI2-NEXT: .LBB48_2: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI2-NEXT: jne .LBB48_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: .LBB48_4: +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB48_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: .LBB48_6: +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_d1_indexzext: ; X64-NOBMI: # %bb.0: @@ -4092,18 +4092,18 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_d1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_d1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_d1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_d1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq %numhighbits = sub i8 64, %numlowbits %sh_prom = zext i8 %numhighbits to i64 %highbitscleared = shl i64 %val, %sh_prom @@ -4152,78 +4152,78 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_d2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %edx -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: jne .LBB49_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: .LBB49_2: -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB49_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB49_4: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB49_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: .LBB49_6: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_d2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl (%eax), %edx -; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %edi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB49_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi -; X86-BMI1BMI2-NEXT: .LBB49_2: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB49_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB49_4: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB49_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: .LBB49_6: -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_d2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %edx +; X86-BMI1-NEXT: movl 4(%eax), %eax +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: shldl %cl, %edx, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: jne .LBB49_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: .LBB49_2: +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edx +; X86-BMI1-NEXT: jne .LBB49_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %ebx +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB49_4: +; X86-BMI1-NEXT: shrdl %cl, %edi, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB49_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: .LBB49_6: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_d2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%eax), %edx +; X86-BMI2-NEXT: movl 4(%eax), %esi +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %edx, %esi +; X86-BMI2-NEXT: shlxl %ecx, %edx, %edi +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB49_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl $0, %edi +; X86-BMI2-NEXT: .LBB49_2: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI2-NEXT: jne .LBB49_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: .LBB49_4: +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB49_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: .LBB49_6: +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_d2_load: ; X64-NOBMI: # %bb.0: @@ -4235,16 +4235,16 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_d2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_d2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_d2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_d2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %numhighbits = sub i64 64, %numlowbits %highbitscleared = shl i64 %val, %numhighbits @@ -4293,78 +4293,78 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_d3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %edx -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: jne .LBB50_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: .LBB50_2: -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB50_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB50_4: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB50_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: .LBB50_6: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_d3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl (%eax), %edx -; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %edi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB50_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi -; X86-BMI1BMI2-NEXT: .LBB50_2: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB50_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB50_4: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB50_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: .LBB50_6: -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_d3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %edx +; X86-BMI1-NEXT: movl 4(%eax), %eax +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: shldl %cl, %edx, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: jne .LBB50_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: .LBB50_2: +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edx +; X86-BMI1-NEXT: jne .LBB50_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %ebx +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB50_4: +; X86-BMI1-NEXT: shrdl %cl, %edi, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB50_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: .LBB50_6: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_d3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%eax), %edx +; X86-BMI2-NEXT: movl 4(%eax), %esi +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %edx, %esi +; X86-BMI2-NEXT: shlxl %ecx, %edx, %edi +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB50_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl $0, %edi +; X86-BMI2-NEXT: .LBB50_2: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI2-NEXT: jne .LBB50_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: .LBB50_4: +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB50_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: .LBB50_6: +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -4376,18 +4376,18 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_d3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_d3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_d3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_d3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %numhighbits = sub i8 64, %numlowbits %sh_prom = zext i8 %numhighbits to i64 @@ -4426,53 +4426,53 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_d0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %esi, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB51_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: .LBB51_2: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB51_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: .LBB51_4: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_32_d0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB51_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB51_2: -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB51_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %eax -; X86-BMI1BMI2-NEXT: .LBB51_4: -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_d0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: shll %cl, %edx +; X86-BMI1-NEXT: shldl %cl, %esi, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB51_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: .LBB51_2: +; X86-BMI1-NEXT: shrdl %cl, %eax, %edx +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB51_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: .LBB51_4: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_32_d0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB51_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB51_2: +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB51_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB51_4: +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_d0: ; X64-NOBMI: # %bb.0: @@ -4485,18 +4485,18 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_d0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_32_d0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_d0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_32_d0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits %highbitscleared = shl i64 %val, %numhighbits %masked = lshr i64 %highbitscleared, %numhighbits @@ -4516,18 +4516,18 @@ define i32 @bzhi64_32_d1(i64 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_d1: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_d1: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi64_32_d1: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi64_32_d1: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_d1: ; X64-NOBMI: # %bb.0: @@ -4540,16 +4540,16 @@ define i32 @bzhi64_32_d1(i64 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_d1: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_d1: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_d1: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_d1: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %truncval = trunc i64 %val to i32 %numhighbits = sub i32 32, %numlowbits %highbitscleared = shl i32 %truncval, %numhighbits @@ -4692,11 +4692,16 @@ define i64 @bzhi64_constant_mask64(i64 %val) nounwind { ; X64-BMI1TBM-NEXT: bextrq $15872, %rdi, %rax # imm = 0x3E00 ; X64-BMI1TBM-NEXT: retq ; -; X64-BMI1NOTBMBMI2-LABEL: bzhi64_constant_mask64: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movb $62, %al -; X64-BMI1NOTBMBMI2-NEXT: bzhiq %rax, %rdi, %rax -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMI2TBM-LABEL: bzhi64_constant_mask64: +; X64-BMI2TBM: # %bb.0: +; X64-BMI2TBM-NEXT: bextrq $15872, %rdi, %rax # imm = 0x3E00 +; X64-BMI2TBM-NEXT: retq +; +; X64-BMI2NOTBM-LABEL: bzhi64_constant_mask64: +; X64-BMI2NOTBM: # %bb.0: +; X64-BMI2NOTBM-NEXT: movb $62, %al +; X64-BMI2NOTBM-NEXT: bzhiq %rax, %rdi, %rax +; X64-BMI2NOTBM-NEXT: retq %masked = and i64 %val, 4611686018427387903 ret i64 %masked } @@ -4727,11 +4732,16 @@ define i64 @bzhi64_constant_mask64_load(i64* %val) nounwind { ; X64-BMI1TBM-NEXT: bextrq $15872, (%rdi), %rax # imm = 0x3E00 ; X64-BMI1TBM-NEXT: retq ; -; X64-BMI1NOTBMBMI2-LABEL: bzhi64_constant_mask64_load: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movb $62, %al -; X64-BMI1NOTBMBMI2-NEXT: bzhiq %rax, (%rdi), %rax -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMI2TBM-LABEL: bzhi64_constant_mask64_load: +; X64-BMI2TBM: # %bb.0: +; X64-BMI2TBM-NEXT: bextrq $15872, (%rdi), %rax # imm = 0x3E00 +; X64-BMI2TBM-NEXT: retq +; +; X64-BMI2NOTBM-LABEL: bzhi64_constant_mask64_load: +; X64-BMI2NOTBM: # %bb.0: +; X64-BMI2NOTBM-NEXT: movb $62, %al +; X64-BMI2NOTBM-NEXT: bzhiq %rax, (%rdi), %rax +; X64-BMI2NOTBM-NEXT: retq %val1 = load i64, i64* %val %masked = and i64 %val1, 4611686018427387903 ret i64 %masked -- 2.7.4