From 1709829fede301a390163480ef51f0e140b0d028 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 30 Sep 2018 03:01:46 +0000 Subject: [PATCH] [X86] Disable BMI BEXTR in X86DAGToDAGISel::matchBEXTRFromAnd unless we're on compiling for a CPU with single uop BEXTR Summary: This function turns (X >> C1) & C2 into a BMI BEXTR or TBM BEXTRI instruction. For BMI BEXTR we have to materialize an immediate into a register to feed to the BEXTR instruction. The BMI BEXTR instruction is 2 uops on Intel CPUs. It looks like on SKL its one port 0/6 uop and one port 1/5 uop. Despite what Agner's tables say. I know one of the uops is a regular shift uop so it would have to go through the port 0/6 shifter unit. So that's the same or worse execution wise than the shift+and which is one 0/6 uop and one 0/1/5/6 uop. The move immediate into register is an additional 0/1/5/6 uop. For now I've limited this transform to AMD CPUs which have a single uop BEXTR. If may also might make sense if we can fold a load or if the and immediate is larger than 32-bits and can't be encoded as a sign extended 32-bit value or if LICM or CSE can hoist the move immediate and share it. But we'd need to look more carefully at that. In the regression I looked at it doesn't look load folding or large immediates were occurring so the regression isn't caused by the loss of those. So we could try to be smarter here if we find a compelling case. Reviewers: RKSimon, spatel, lebedev.ri, andreadb Reviewed By: RKSimon Subscribers: llvm-commits, andreadb, RKSimon Differential Revision: https://reviews.llvm.org/D52570 llvm-svn: 343399 --- llvm/lib/Target/X86/X86.td | 9 ++++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 9 +++- llvm/lib/Target/X86/X86Subtarget.h | 4 ++ llvm/test/CodeGen/X86/bmi-x86_64.ll | 64 ++++++++++++++++++------- llvm/test/CodeGen/X86/bmi.ll | 85 ++++++++++++++++++++++----------- llvm/test/CodeGen/X86/extract-bits.ll | 20 ++++---- 6 files changed, 136 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index e5d5d92..2c48b54 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -400,6 +400,10 @@ def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", "Support movdir64b instruction">; +def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true", + "Indicates that the BEXTR instruction is implemented as a single uop " + "with good throughput.">; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -987,6 +991,7 @@ def : ProcessorModel<"btver2", BtVer2Model, [ FeatureSlowSHLD, FeatureLAHFSAHF, FeatureFast15ByteNOP, + FeatureFastBEXTR, FeatureFastPartialYMMorZMMWrite ]>; @@ -1042,6 +1047,7 @@ def : Proc<"bdver2", [ FeatureSlowSHLD, FeatureLAHFSAHF, FeatureFast11ByteNOP, + FeatureFastBEXTR, FeatureMacroFusion ]>; @@ -1074,6 +1080,7 @@ def : Proc<"bdver3", [ FeatureFSGSBase, FeatureLAHFSAHF, FeatureFast11ByteNOP, + FeatureFastBEXTR, FeatureMacroFusion ]>; @@ -1105,6 +1112,7 @@ def : Proc<"bdver4", [ FeatureSlowSHLD, FeatureFSGSBase, FeatureLAHFSAHF, + FeatureFastBEXTR, FeatureFast11ByteNOP, FeatureMWAITX, FeatureMacroFusion @@ -1130,6 +1138,7 @@ def: ProcessorModel<"znver1", Znver1Model, [ FeatureFastLZCNT, FeatureLAHFSAHF, FeatureLZCNT, + FeatureFastBEXTR, FeatureFast15ByteNOP, FeatureMacroFusion, FeatureMMX, diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 32ad262..a0ef4b6 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2590,7 +2590,14 @@ bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); - if (!Subtarget->hasBMI() && !Subtarget->hasTBM()) + // If we have TBM we can use an immediate for the control. If we have BMI + // we should only do this if the BEXTR instruction is implemented well. + // Otherwise moving the control into a register makes this more costly. + // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM + // hoisting the move immediate would make it worthwhile with a less optimal + // BEXTR? + if (!Subtarget->hasTBM() && + !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR())) return false; // Must have a shift right. diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 82ff942..5dd406b 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -385,6 +385,9 @@ protected: /// Processor supports PCONFIG instruction bool HasPCONFIG = false; + /// Processor has a single uop BEXTR implementation. + bool HasFastBEXTR = false; + /// Use a retpoline thunk rather than indirect calls to block speculative /// execution. bool UseRetpolineIndirectCalls = false; @@ -629,6 +632,7 @@ public: bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; } bool hasFastLZCNT() const { return HasFastLZCNT; } bool hasFastSHLDRotate() const { return HasFastSHLDRotate; } + bool hasFastBEXTR() const { return HasFastBEXTR; } bool hasMacroFusion() const { return HasMacroFusion; } bool hasERMSB() const { return HasERMSB; } bool hasSlowDivide32() const { return HasSlowDivide32; } diff --git a/llvm/test/CodeGen/X86/bmi-x86_64.ll b/llvm/test/CodeGen/X86/bmi-x86_64.ll index 970dafd..81ac313 100644 --- a/llvm/test/CodeGen/X86/bmi-x86_64.ll +++ b/llvm/test/CodeGen/X86/bmi-x86_64.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,BMI1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,BMI2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,BEXTR-SLOW,BMI1,BMI1-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,BEXTR-SLOW,BMI2,BMI2-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=CHECK,BEXTR-FAST,BMI1,BMI1-FAST +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+fast-bextr | FileCheck %s --check-prefixes=CHECK,BEXTR-FAST,BMI2,BMI2-FAST declare i64 @llvm.x86.bmi.bextr.64(i64, i64) @@ -14,11 +16,18 @@ define i64 @bextr64(i64 %x, i64 %y) { } define i64 @bextr64b(i64 %x) uwtable ssp { -; CHECK-LABEL: bextr64b: -; CHECK: # %bb.0: -; CHECK-NEXT: movl $3076, %eax # imm = 0xC04 -; CHECK-NEXT: bextrl %eax, %edi, %eax -; CHECK-NEXT: retq +; BEXTR-SLOW-LABEL: bextr64b: +; BEXTR-SLOW: # %bb.0: +; BEXTR-SLOW-NEXT: movq %rdi, %rax +; BEXTR-SLOW-NEXT: shrl $4, %eax +; BEXTR-SLOW-NEXT: andl $4095, %eax # imm = 0xFFF +; BEXTR-SLOW-NEXT: retq +; +; BEXTR-FAST-LABEL: bextr64b: +; BEXTR-FAST: # %bb.0: +; BEXTR-FAST-NEXT: movl $3076, %eax # imm = 0xC04 +; BEXTR-FAST-NEXT: bextrl %eax, %edi, %eax +; BEXTR-FAST-NEXT: retq %1 = lshr i64 %x, 4 %2 = and i64 %1, 4095 ret i64 %2 @@ -37,11 +46,18 @@ define i64 @bextr64_subreg(i64 %x) uwtable ssp { } define i64 @bextr64b_load(i64* %x) { -; CHECK-LABEL: bextr64b_load: -; CHECK: # %bb.0: -; CHECK-NEXT: movl $3076, %eax # imm = 0xC04 -; CHECK-NEXT: bextrl %eax, (%rdi), %eax -; CHECK-NEXT: retq +; BEXTR-SLOW-LABEL: bextr64b_load: +; BEXTR-SLOW: # %bb.0: +; BEXTR-SLOW-NEXT: movl (%rdi), %eax +; BEXTR-SLOW-NEXT: shrl $4, %eax +; BEXTR-SLOW-NEXT: andl $4095, %eax # imm = 0xFFF +; BEXTR-SLOW-NEXT: retq +; +; BEXTR-FAST-LABEL: bextr64b_load: +; BEXTR-FAST: # %bb.0: +; BEXTR-FAST-NEXT: movl $3076, %eax # imm = 0xC04 +; BEXTR-FAST-NEXT: bextrl %eax, (%rdi), %eax +; BEXTR-FAST-NEXT: retq %1 = load i64, i64* %x, align 8 %2 = lshr i64 %1, 4 %3 = and i64 %2, 4095 @@ -61,11 +77,25 @@ define i64 @bextr64c(i64 %x, i32 %y) { } define i64 @bextr64d(i64 %a) { -; CHECK-LABEL: bextr64d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl $8450, %eax # imm = 0x2102 -; CHECK-NEXT: bextrq %rax, %rdi, %rax -; CHECK-NEXT: retq +; BMI1-SLOW-LABEL: bextr64d: +; BMI1-SLOW: # %bb.0: # %entry +; BMI1-SLOW-NEXT: shrq $2, %rdi +; BMI1-SLOW-NEXT: movl $8448, %eax # imm = 0x2100 +; BMI1-SLOW-NEXT: bextrq %rax, %rdi, %rax +; BMI1-SLOW-NEXT: retq +; +; BMI2-SLOW-LABEL: bextr64d: +; BMI2-SLOW: # %bb.0: # %entry +; BMI2-SLOW-NEXT: shrq $2, %rdi +; BMI2-SLOW-NEXT: movb $33, %al +; BMI2-SLOW-NEXT: bzhiq %rax, %rdi, %rax +; BMI2-SLOW-NEXT: retq +; +; BEXTR-FAST-LABEL: bextr64d: +; BEXTR-FAST: # %bb.0: # %entry +; BEXTR-FAST-NEXT: movl $8450, %eax # imm = 0x2102 +; BEXTR-FAST-NEXT: bextrq %rax, %rdi, %rax +; BEXTR-FAST-NEXT: retq entry: %shr = lshr i64 %a, 2 %and = and i64 %shr, 8589934591 diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll index 945ff82..be6f193 100644 --- a/llvm/test/CodeGen/X86/bmi.ll +++ b/llvm/test/CodeGen/X86/bmi.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,X86,BMI2,X86-BMI2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,X64,BMI2,X64-BMI2 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,X86,X86-SLOW-BEXTR +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,X86,X86-SLOW-BEXTR +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,X64,X64-SLOW-BEXTR +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,X64,X64-SLOW-BEXTR +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=CHECK,X86,X86-FAST-BEXTR +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=CHECK,X64,X64-FAST-BEXTR define i32 @andn32(i32 %x, i32 %y) { ; X86-LABEL: andn32: @@ -342,17 +344,31 @@ define i32 @bextr32_load(i32* %x, i32 %y) { } define i32 @bextr32b(i32 %x) uwtable ssp { -; X86-LABEL: bextr32b: -; X86: # %bb.0: -; X86-NEXT: movl $3076, %eax # imm = 0xC04 -; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-NEXT: retl -; -; X64-LABEL: bextr32b: -; X64: # %bb.0: -; X64-NEXT: movl $3076, %eax # imm = 0xC04 -; X64-NEXT: bextrl %eax, %edi, %eax -; X64-NEXT: retq +; X86-SLOW-BEXTR-LABEL: bextr32b: +; X86-SLOW-BEXTR: # %bb.0: +; X86-SLOW-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-BEXTR-NEXT: shrl $4, %eax +; X86-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF +; X86-SLOW-BEXTR-NEXT: retl +; +; X64-SLOW-BEXTR-LABEL: bextr32b: +; X64-SLOW-BEXTR: # %bb.0: +; X64-SLOW-BEXTR-NEXT: movl %edi, %eax +; X64-SLOW-BEXTR-NEXT: shrl $4, %eax +; X64-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF +; X64-SLOW-BEXTR-NEXT: retq +; +; X86-FAST-BEXTR-LABEL: bextr32b: +; X86-FAST-BEXTR: # %bb.0: +; X86-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 +; X86-FAST-BEXTR-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-FAST-BEXTR-NEXT: retl +; +; X64-FAST-BEXTR-LABEL: bextr32b: +; X64-FAST-BEXTR: # %bb.0: +; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 +; X64-FAST-BEXTR-NEXT: bextrl %eax, %edi, %eax +; X64-FAST-BEXTR-NEXT: retq %1 = lshr i32 %x, 4 %2 = and i32 %1, 4095 ret i32 %2 @@ -376,18 +392,33 @@ define i32 @bextr32_subreg(i32 %x) uwtable ssp { } define i32 @bextr32b_load(i32* %x) uwtable ssp { -; X86-LABEL: bextr32b_load: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $3076, %ecx # imm = 0xC04 -; X86-NEXT: bextrl %ecx, (%eax), %eax -; X86-NEXT: retl -; -; X64-LABEL: bextr32b_load: -; X64: # %bb.0: -; X64-NEXT: movl $3076, %eax # imm = 0xC04 -; X64-NEXT: bextrl %eax, (%rdi), %eax -; X64-NEXT: retq +; X86-SLOW-BEXTR-LABEL: bextr32b_load: +; X86-SLOW-BEXTR: # %bb.0: +; X86-SLOW-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-BEXTR-NEXT: movl (%eax), %eax +; X86-SLOW-BEXTR-NEXT: shrl $4, %eax +; X86-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF +; X86-SLOW-BEXTR-NEXT: retl +; +; X64-SLOW-BEXTR-LABEL: bextr32b_load: +; X64-SLOW-BEXTR: # %bb.0: +; X64-SLOW-BEXTR-NEXT: movl (%rdi), %eax +; X64-SLOW-BEXTR-NEXT: shrl $4, %eax +; X64-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF +; X64-SLOW-BEXTR-NEXT: retq +; +; X86-FAST-BEXTR-LABEL: bextr32b_load: +; X86-FAST-BEXTR: # %bb.0: +; X86-FAST-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FAST-BEXTR-NEXT: movl $3076, %ecx # imm = 0xC04 +; X86-FAST-BEXTR-NEXT: bextrl %ecx, (%eax), %eax +; X86-FAST-BEXTR-NEXT: retl +; +; X64-FAST-BEXTR-LABEL: bextr32b_load: +; X64-FAST-BEXTR: # %bb.0: +; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 +; X64-FAST-BEXTR-NEXT: bextrl %eax, (%rdi), %eax +; X64-FAST-BEXTR-NEXT: retq %1 = load i32, i32* %x %2 = lshr i32 %1, 4 %3 = and i32 %2, 4095 diff --git a/llvm/test/CodeGen/X86/extract-bits.ll b/llvm/test/CodeGen/X86/extract-bits.ll index 6f7fffb..e9a3cec 100644 --- a/llvm/test/CodeGen/X86/extract-bits.ll +++ b/llvm/test/CodeGen/X86/extract-bits.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI,X86-NOBMI -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1NOTBM,X86-BMI1NOTBM -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1TBM,X86-BMI1TBM -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1TBM,X86-BMI1TBM,BMI1TBMBMI2,X86-BMI1TBMBMI2 -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1NOTBMBMI2,X86-BMI1NOTBMBMI2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI,X64-NOBMI -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1NOTBM,X64-BMI1NOTBM -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1TBM,X64-BMI1TBM -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1TBM,X64-BMI1TBM,BMI1TBMBMI2,X64-BMI1TBMBMI2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1NOTBMBMI2,X64-BMI1NOTBMBMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI,X86-NOBMI +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1NOTBM,X86-BMI1NOTBM +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1TBM,X86-BMI1TBM +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1TBM,X86-BMI1TBM,BMI1TBMBMI2,X86-BMI1TBMBMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1NOTBMBMI2,X86-BMI1NOTBMBMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI,X64-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1NOTBM,X64-BMI1NOTBM +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1TBM,X64-BMI1TBM +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1TBM,X64-BMI1TBM,BMI1TBMBMI2,X64-BMI1TBMBMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1NOTBMBMI2,X64-BMI1NOTBMBMI2 ; *Please* keep in sync with test/CodeGen/AArch64/extract-bits.ll -- 2.7.4