From 7ef6ea324a0ce651aedd3d3b6bc40a76674b85da Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 5 Dec 2016 04:51:31 +0000 Subject: [PATCH] [AVX-512] Teach fast isel to use masked compare and movss for handling scalar cmp and select sequence when AVX-512 is enabled. This matches the behavior of normal isel. llvm-svn: 288636 --- llvm/lib/Target/X86/X86FastISel.cpp | 73 ++++- llvm/test/CodeGen/X86/fast-isel-select-sse.ll | 436 +++++++++----------------- 2 files changed, 215 insertions(+), 294 deletions(-) diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 0cc0615..44d918f 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -170,6 +170,12 @@ private: const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB, X86AddressMode &AM); + + unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, bool Op1IsKill, + unsigned Op2, bool Op2IsKill, unsigned Op3, + bool Op3IsKill); }; } // end anonymous namespace. @@ -2180,9 +2186,36 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); unsigned ResultReg; - - if (Subtarget->hasAVX()) { - const TargetRegisterClass *FR32 = &X86::FR32RegClass; + + if (Subtarget->hasAVX512()) { + // If we have AVX512 we can use a mask compare and masked movss/sd. + const TargetRegisterClass *VR128X = &X86::VR128XRegClass; + const TargetRegisterClass *VK1 = &X86::VK1RegClass; + + unsigned CmpOpcode = + (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr; + unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill, + CmpRHSReg, CmpRHSIsKill, CC); + + // Need an IMPLICIT_DEF for the input that is used to generate the upper + // bits of the result register since its not based on any of the inputs. + unsigned ImplicitDefReg = createResultReg(VR128X); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); + + // Place RHSReg is the passthru of the masked movss/sd operation and put + // LHS in the input. The mask input comes from the compare. + unsigned MovOpcode = + (RetVT.SimpleTy == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk; + unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill, + CmpReg, true, ImplicitDefReg, true, + LHSReg, LHSIsKill); + + ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg); + + } else if (Subtarget->hasAVX()) { const TargetRegisterClass *VR128 = &X86::VR128RegClass; // If we have AVX, create 1 blendv instead of 3 logic instructions. @@ -2195,7 +2228,7 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { unsigned BlendOpcode = (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr; - unsigned CmpReg = fastEmitInst_rri(CmpOpcode, FR32, CmpLHSReg, CmpLHSIsKill, + unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill, CmpRHSReg, CmpRHSIsKill, CC); unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CmpReg, true); @@ -3849,6 +3882,38 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, return true; } +unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + unsigned Op2, bool Op2IsKill, + unsigned Op3, bool Op3IsKill) { + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2); + Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 3); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Op2, getKillRegState(Op2IsKill)) + .addReg(Op3, getKillRegState(Op3IsKill)); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Op2, getKillRegState(Op2IsKill)) + .addReg(Op3, getKillRegState(Op3IsKill)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + namespace llvm { FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo, diff --git a/llvm/test/CodeGen/X86/fast-isel-select-sse.ll b/llvm/test/CodeGen/X86/fast-isel-select-sse.ll index 9c24118..502260d 100644 --- a/llvm/test/CodeGen/X86/fast-isel-select-sse.ll +++ b/llvm/test/CodeGen/X86/fast-isel-select-sse.ll @@ -3,8 +3,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -fast-isel -fast-isel-abort=1 -mattr=avx | FileCheck %s --check-prefix=AVX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512SLOW -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -fast-isel -fast-isel-abort=1 -mattr=avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512FAST +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -fast-isel -fast-isel-abort=1 -mattr=avx512f | FileCheck %s --check-prefix=AVX512 ; Test all cmp predicates that can be used with SSE. @@ -23,18 +23,12 @@ define float @select_fcmp_oeq_f32(float %a, float %b, float %c, float %d) { ; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_oeq_f32: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpeqss %xmm1, %xmm0, %k1 -; AVX512SLOW-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovaps %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_oeq_f32: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 -; AVX512FAST-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_oeq_f32: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovaps %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp oeq float %a, %b %2 = select i1 %1, float %c, float %d @@ -56,18 +50,12 @@ define double @select_fcmp_oeq_f64(double %a, double %b, double %c, double %d) { ; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_oeq_f64: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpeqsd %xmm1, %xmm0, %k1 -; AVX512SLOW-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovapd %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_oeq_f64: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 -; AVX512FAST-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_oeq_f64: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovapd %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp oeq double %a, %b %2 = select i1 %1, double %c, double %d @@ -90,18 +78,12 @@ define float @select_fcmp_ogt_f32(float %a, float %b, float %c, float %d) { ; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_ogt_f32: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpltss %xmm0, %xmm1, %k1 -; AVX512SLOW-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovaps %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_ogt_f32: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpltss %xmm0, %xmm1, %xmm0 -; AVX512FAST-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_ogt_f32: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpltss %xmm0, %xmm1, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovaps %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp ogt float %a, %b %2 = select i1 %1, float %c, float %d @@ -124,18 +106,12 @@ define double @select_fcmp_ogt_f64(double %a, double %b, double %c, double %d) { ; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_ogt_f64: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpltsd %xmm0, %xmm1, %k1 -; AVX512SLOW-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovapd %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_ogt_f64: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpltsd %xmm0, %xmm1, %xmm0 -; AVX512FAST-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_ogt_f64: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpltsd %xmm0, %xmm1, %k1 +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovapd %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp ogt double %a, %b %2 = select i1 %1, double %c, double %d @@ -158,18 +134,12 @@ define float @select_fcmp_oge_f32(float %a, float %b, float %c, float %d) { ; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_oge_f32: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpless %xmm0, %xmm1, %k1 -; AVX512SLOW-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovaps %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_oge_f32: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpless %xmm0, %xmm1, %xmm0 -; AVX512FAST-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_oge_f32: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpless %xmm0, %xmm1, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovaps %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp oge float %a, %b %2 = select i1 %1, float %c, float %d @@ -192,18 +162,12 @@ define double @select_fcmp_oge_f64(double %a, double %b, double %c, double %d) { ; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_oge_f64: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmplesd %xmm0, %xmm1, %k1 -; AVX512SLOW-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovapd %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_oge_f64: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmplesd %xmm0, %xmm1, %xmm0 -; AVX512FAST-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_oge_f64: +; AVX512: # BB#0: +; AVX512-NEXT: vcmplesd %xmm0, %xmm1, %k1 +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovapd %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp oge double %a, %b %2 = select i1 %1, double %c, double %d @@ -225,18 +189,12 @@ define float @select_fcmp_olt_f32(float %a, float %b, float %c, float %d) { ; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_olt_f32: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpltss %xmm1, %xmm0, %k1 -; AVX512SLOW-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovaps %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_olt_f32: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpltss %xmm1, %xmm0, %xmm0 -; AVX512FAST-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_olt_f32: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpltss %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovaps %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp olt float %a, %b %2 = select i1 %1, float %c, float %d @@ -258,18 +216,12 @@ define double @select_fcmp_olt_f64(double %a, double %b, double %c, double %d) { ; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_olt_f64: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpltsd %xmm1, %xmm0, %k1 -; AVX512SLOW-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovapd %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_olt_f64: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpltsd %xmm1, %xmm0, %xmm0 -; AVX512FAST-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_olt_f64: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpltsd %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovapd %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp olt double %a, %b %2 = select i1 %1, double %c, double %d @@ -291,18 +243,12 @@ define float @select_fcmp_ole_f32(float %a, float %b, float %c, float %d) { ; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_ole_f32: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpless %xmm1, %xmm0, %k1 -; AVX512SLOW-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovaps %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_ole_f32: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpless %xmm1, %xmm0, %xmm0 -; AVX512FAST-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_ole_f32: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpless %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovaps %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp ole float %a, %b %2 = select i1 %1, float %c, float %d @@ -324,18 +270,12 @@ define double @select_fcmp_ole_f64(double %a, double %b, double %c, double %d) { ; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_ole_f64: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmplesd %xmm1, %xmm0, %k1 -; AVX512SLOW-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovapd %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_ole_f64: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmplesd %xmm1, %xmm0, %xmm0 -; AVX512FAST-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_ole_f64: +; AVX512: # BB#0: +; AVX512-NEXT: vcmplesd %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovapd %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp ole double %a, %b %2 = select i1 %1, double %c, double %d @@ -357,18 +297,12 @@ define float @select_fcmp_ord_f32(float %a, float %b, float %c, float %d) { ; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_ord_f32: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpordss %xmm1, %xmm0, %k1 -; AVX512SLOW-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovaps %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_ord_f32: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpordss %xmm1, %xmm0, %xmm0 -; AVX512FAST-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_ord_f32: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpordss %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovaps %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp ord float %a, %b %2 = select i1 %1, float %c, float %d @@ -390,18 +324,12 @@ define double @select_fcmp_ord_f64(double %a, double %b, double %c, double %d) { ; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_ord_f64: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpordsd %xmm1, %xmm0, %k1 -; AVX512SLOW-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovapd %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_ord_f64: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0 -; AVX512FAST-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_ord_f64: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpordsd %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovapd %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp ord double %a, %b %2 = select i1 %1, double %c, double %d @@ -423,18 +351,12 @@ define float @select_fcmp_uno_f32(float %a, float %b, float %c, float %d) { ; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_uno_f32: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpunordss %xmm1, %xmm0, %k1 -; AVX512SLOW-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovaps %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_uno_f32: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0 -; AVX512FAST-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_uno_f32: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpunordss %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovaps %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp uno float %a, %b %2 = select i1 %1, float %c, float %d @@ -456,18 +378,12 @@ define double @select_fcmp_uno_f64(double %a, double %b, double %c, double %d) { ; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_uno_f64: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpunordsd %xmm1, %xmm0, %k1 -; AVX512SLOW-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovapd %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_uno_f64: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpunordsd %xmm1, %xmm0, %xmm0 -; AVX512FAST-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_uno_f64: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpunordsd %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovapd %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp uno double %a, %b %2 = select i1 %1, double %c, double %d @@ -489,18 +405,12 @@ define float @select_fcmp_ugt_f32(float %a, float %b, float %c, float %d) { ; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_ugt_f32: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpnless %xmm1, %xmm0, %k1 -; AVX512SLOW-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovaps %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_ugt_f32: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpnless %xmm1, %xmm0, %xmm0 -; AVX512FAST-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_ugt_f32: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpnless %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovaps %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp ugt float %a, %b %2 = select i1 %1, float %c, float %d @@ -522,18 +432,12 @@ define double @select_fcmp_ugt_f64(double %a, double %b, double %c, double %d) { ; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_ugt_f64: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpnlesd %xmm1, %xmm0, %k1 -; AVX512SLOW-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovapd %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_ugt_f64: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpnlesd %xmm1, %xmm0, %xmm0 -; AVX512FAST-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_ugt_f64: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpnlesd %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovapd %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp ugt double %a, %b %2 = select i1 %1, double %c, double %d @@ -555,18 +459,12 @@ define float @select_fcmp_uge_f32(float %a, float %b, float %c, float %d) { ; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_uge_f32: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpnltss %xmm1, %xmm0, %k1 -; AVX512SLOW-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovaps %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_uge_f32: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpnltss %xmm1, %xmm0, %xmm0 -; AVX512FAST-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_uge_f32: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpnltss %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovaps %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp uge float %a, %b %2 = select i1 %1, float %c, float %d @@ -588,18 +486,12 @@ define double @select_fcmp_uge_f64(double %a, double %b, double %c, double %d) { ; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_uge_f64: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpnltsd %xmm1, %xmm0, %k1 -; AVX512SLOW-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovapd %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_uge_f64: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpnltsd %xmm1, %xmm0, %xmm0 -; AVX512FAST-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_uge_f64: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpnltsd %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovapd %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp uge double %a, %b %2 = select i1 %1, double %c, double %d @@ -622,18 +514,12 @@ define float @select_fcmp_ult_f32(float %a, float %b, float %c, float %d) { ; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_ult_f32: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpnless %xmm0, %xmm1, %k1 -; AVX512SLOW-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovaps %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_ult_f32: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpnless %xmm0, %xmm1, %xmm0 -; AVX512FAST-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_ult_f32: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpnless %xmm0, %xmm1, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovaps %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp ult float %a, %b %2 = select i1 %1, float %c, float %d @@ -656,18 +542,12 @@ define double @select_fcmp_ult_f64(double %a, double %b, double %c, double %d) { ; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_ult_f64: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpnlesd %xmm0, %xmm1, %k1 -; AVX512SLOW-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovapd %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_ult_f64: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpnlesd %xmm0, %xmm1, %xmm0 -; AVX512FAST-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_ult_f64: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpnlesd %xmm0, %xmm1, %k1 +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovapd %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp ult double %a, %b %2 = select i1 %1, double %c, double %d @@ -690,18 +570,12 @@ define float @select_fcmp_ule_f32(float %a, float %b, float %c, float %d) { ; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_ule_f32: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpnltss %xmm0, %xmm1, %k1 -; AVX512SLOW-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovaps %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_ule_f32: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpnltss %xmm0, %xmm1, %xmm0 -; AVX512FAST-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_ule_f32: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpnltss %xmm0, %xmm1, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovaps %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp ule float %a, %b %2 = select i1 %1, float %c, float %d @@ -724,18 +598,12 @@ define double @select_fcmp_ule_f64(double %a, double %b, double %c, double %d) { ; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_ule_f64: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpnltsd %xmm0, %xmm1, %k1 -; AVX512SLOW-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovapd %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_ule_f64: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpnltsd %xmm0, %xmm1, %xmm0 -; AVX512FAST-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_ule_f64: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpnltsd %xmm0, %xmm1, %k1 +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovapd %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp ule double %a, %b %2 = select i1 %1, double %c, double %d @@ -757,18 +625,12 @@ define float @select_fcmp_une_f32(float %a, float %b, float %c, float %d) { ; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_une_f32: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpneqss %xmm1, %xmm0, %k1 -; AVX512SLOW-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovaps %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_une_f32: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpneqss %xmm1, %xmm0, %xmm0 -; AVX512FAST-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_une_f32: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpneqss %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovaps %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp une float %a, %b %2 = select i1 %1, float %c, float %d @@ -790,18 +652,12 @@ define double @select_fcmp_une_f64(double %a, double %b, double %c, double %d) { ; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 ; AVX-NEXT: retq ; -; AVX512SLOW-LABEL: select_fcmp_une_f64: -; AVX512SLOW: # BB#0: -; AVX512SLOW-NEXT: vcmpneqsd %xmm1, %xmm0, %k1 -; AVX512SLOW-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} -; AVX512SLOW-NEXT: vmovapd %xmm3, %xmm0 -; AVX512SLOW-NEXT: retq -; -; AVX512FAST-LABEL: select_fcmp_une_f64: -; AVX512FAST: # BB#0: -; AVX512FAST-NEXT: vcmpneqsd %xmm1, %xmm0, %xmm0 -; AVX512FAST-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512FAST-NEXT: retq +; AVX512-LABEL: select_fcmp_une_f64: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpneqsd %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm3 {%k1} +; AVX512-NEXT: vmovapd %xmm3, %xmm0 +; AVX512-NEXT: retq ; %1 = fcmp une double %a, %b %2 = select i1 %1, double %c, double %d -- 2.7.4