From: Craig Topper Date: Wed, 7 Feb 2018 21:41:50 +0000 (+0000) Subject: [X86] When doing callee save/restore for k-registers make sure we don't use KMOVQ... X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8baa9c77e3b2c063c8fa26627787730a610ce4f4;p=platform%2Fupstream%2Fllvm.git [X86] When doing callee save/restore for k-registers make sure we don't use KMOVQ on non-BWI targets If we are saving/restoring k-registers, the default behavior of getMinimalRegisterClass will find the VK64 class with a spill size of 64 bits. This will cause the KMOVQ opcode to be used for save/restore. If we don't have have BWI instructions we need to constrain the class returned to give us VK16 with a 16-bit spill size. We can do this by passing the either v16i1 or v64i1 into getMinimalRegisterClass. Also add asserts to make sure BWI is enabled anytime we use KMOVD/KMOVQ. These are what caught this bug. Fixes PR36256 Differential Revision: https://reviews.llvm.org/D42989 llvm-svn: 324533 --- diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 729bd17..36f29b0 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -1925,7 +1925,12 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) continue; - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + // If this is k-register make sure we lookup via the largest legal type. + MVT VT = MVT::Other; + if (X86::VK16RegClass.contains(Reg)) + VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; + + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); unsigned Size = TRI->getSpillSize(*RC); unsigned Align = TRI->getSpillAlignment(*RC); // ensure alignment @@ -1992,9 +1997,15 @@ bool X86FrameLowering::spillCalleeSavedRegisters( unsigned Reg = CSI[i-1].getReg(); if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) continue; + + // If this is k-register make sure we lookup via the largest legal type. + MVT VT = MVT::Other; + if (X86::VK16RegClass.contains(Reg)) + VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; + // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC, TRI); @@ -2068,7 +2079,12 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, X86::GR32RegClass.contains(Reg)) continue; - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + // If this is k-register make sure we lookup via the largest legal type. + MVT VT = MVT::Other; + if (X86::VK16RegClass.contains(Reg)) + VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; + + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); } diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 77310f0..19e9f30 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -6919,8 +6919,10 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, (HasAVX512 ? X86::VMOVSSZmr : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr); if (X86::RFP32RegClass.hasSubClassEq(RC)) return load ? X86::LD_Fp32m : X86::ST_Fp32m; - if (X86::VK32RegClass.hasSubClassEq(RC)) + if (X86::VK32RegClass.hasSubClassEq(RC)) { + assert(STI.hasBWI() && "KMOVD requires BWI"); return load ? X86::KMOVDkm : X86::KMOVDmk; + } llvm_unreachable("Unknown 4-byte regclass"); case 8: if (X86::GR64RegClass.hasSubClassEq(RC)) @@ -6933,8 +6935,10 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr; if (X86::RFP64RegClass.hasSubClassEq(RC)) return load ? X86::LD_Fp64m : X86::ST_Fp64m; - if (X86::VK64RegClass.hasSubClassEq(RC)) + if (X86::VK64RegClass.hasSubClassEq(RC)) { + assert(STI.hasBWI() && "KMOVQ requires BWI"); return load ? X86::KMOVQkm : X86::KMOVQmk; + } llvm_unreachable("Unknown 8-byte regclass"); case 10: assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass"); diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll index 96879b0..61718fb 100644 --- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll +++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll @@ -181,111 +181,217 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl ; WIN32-NEXT: calll _func_float16 ; WIN32-NEXT: retl ; -; WIN64-LABEL: test_prolog_epilog: -; WIN64: # %bb.0: -; WIN64-NEXT: pushq %rbp -; WIN64-NEXT: subq $1328, %rsp # imm = 0x530 -; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp -; WIN64-NEXT: kmovq %k7, 1192(%rbp) # 8-byte Spill -; WIN64-NEXT: kmovq %k6, 1184(%rbp) # 8-byte Spill -; WIN64-NEXT: kmovq %k5, 1176(%rbp) # 8-byte Spill -; WIN64-NEXT: kmovq %k4, 1168(%rbp) # 8-byte Spill -; WIN64-NEXT: vmovaps %zmm21, 1056(%rbp) # 64-byte Spill -; WIN64-NEXT: vmovaps %zmm20, 960(%rbp) # 64-byte Spill -; WIN64-NEXT: vmovaps %zmm19, 896(%rbp) # 64-byte Spill -; WIN64-NEXT: vmovaps %zmm18, 832(%rbp) # 64-byte Spill -; WIN64-NEXT: vmovaps %zmm17, 768(%rbp) # 64-byte Spill -; WIN64-NEXT: vmovaps %zmm16, 704(%rbp) # 64-byte Spill -; WIN64-NEXT: vmovaps %zmm15, 640(%rbp) # 64-byte Spill -; WIN64-NEXT: vmovaps %zmm14, 576(%rbp) # 64-byte Spill -; WIN64-NEXT: vmovaps %zmm13, 512(%rbp) # 64-byte Spill -; WIN64-NEXT: vmovaps %zmm12, 448(%rbp) # 64-byte Spill -; WIN64-NEXT: vmovaps %zmm11, 384(%rbp) # 64-byte Spill -; WIN64-NEXT: vmovaps %zmm10, 320(%rbp) # 64-byte Spill -; WIN64-NEXT: vmovaps %zmm9, 256(%rbp) # 64-byte Spill -; WIN64-NEXT: vmovaps %zmm8, 192(%rbp) # 64-byte Spill -; WIN64-NEXT: vmovaps %zmm7, 128(%rbp) # 64-byte Spill -; WIN64-NEXT: vmovaps %zmm6, 64(%rbp) # 64-byte Spill -; WIN64-NEXT: andq $-64, %rsp -; WIN64-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp) -; WIN64-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx -; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx -; WIN64-NEXT: callq func_float16 -; WIN64-NEXT: vmovaps 64(%rbp), %zmm6 # 64-byte Reload -; WIN64-NEXT: vmovaps 128(%rbp), %zmm7 # 64-byte Reload -; WIN64-NEXT: vmovaps 192(%rbp), %zmm8 # 64-byte Reload -; WIN64-NEXT: vmovaps 256(%rbp), %zmm9 # 64-byte Reload -; WIN64-NEXT: vmovaps 320(%rbp), %zmm10 # 64-byte Reload -; WIN64-NEXT: vmovaps 384(%rbp), %zmm11 # 64-byte Reload -; WIN64-NEXT: vmovaps 448(%rbp), %zmm12 # 64-byte Reload -; WIN64-NEXT: vmovaps 512(%rbp), %zmm13 # 64-byte Reload -; WIN64-NEXT: vmovaps 576(%rbp), %zmm14 # 64-byte Reload -; WIN64-NEXT: vmovaps 640(%rbp), %zmm15 # 64-byte Reload -; WIN64-NEXT: vmovaps 704(%rbp), %zmm16 # 64-byte Reload -; WIN64-NEXT: vmovaps 768(%rbp), %zmm17 # 64-byte Reload -; WIN64-NEXT: vmovaps 832(%rbp), %zmm18 # 64-byte Reload -; WIN64-NEXT: vmovaps 896(%rbp), %zmm19 # 64-byte Reload -; WIN64-NEXT: vmovaps 960(%rbp), %zmm20 # 64-byte Reload -; WIN64-NEXT: vmovaps 1056(%rbp), %zmm21 # 64-byte Reload -; WIN64-NEXT: kmovq 1168(%rbp), %k4 # 8-byte Reload -; WIN64-NEXT: kmovq 1176(%rbp), %k5 # 8-byte Reload -; WIN64-NEXT: kmovq 1184(%rbp), %k6 # 8-byte Reload -; WIN64-NEXT: kmovq 1192(%rbp), %k7 # 8-byte Reload -; WIN64-NEXT: leaq 1200(%rbp), %rsp -; WIN64-NEXT: popq %rbp -; WIN64-NEXT: retq +; WIN64-KNL-LABEL: test_prolog_epilog: +; WIN64-KNL: # %bb.0: +; WIN64-KNL-NEXT: pushq %rbp +; WIN64-KNL-NEXT: subq $1328, %rsp # imm = 0x530 +; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rbp +; WIN64-KNL-NEXT: kmovw %k7, 1198(%rbp) # 2-byte Spill +; WIN64-KNL-NEXT: kmovw %k6, 1196(%rbp) # 2-byte Spill +; WIN64-KNL-NEXT: kmovw %k5, 1194(%rbp) # 2-byte Spill +; WIN64-KNL-NEXT: kmovw %k4, 1192(%rbp) # 2-byte Spill +; WIN64-KNL-NEXT: vmovaps %zmm21, 1104(%rbp) # 64-byte Spill +; WIN64-KNL-NEXT: vmovaps %zmm20, 992(%rbp) # 64-byte Spill +; WIN64-KNL-NEXT: vmovaps %zmm19, 896(%rbp) # 64-byte Spill +; WIN64-KNL-NEXT: vmovaps %zmm18, 832(%rbp) # 64-byte Spill +; WIN64-KNL-NEXT: vmovaps %zmm17, 768(%rbp) # 64-byte Spill +; WIN64-KNL-NEXT: vmovaps %zmm16, 704(%rbp) # 64-byte Spill +; WIN64-KNL-NEXT: vmovaps %zmm15, 640(%rbp) # 64-byte Spill +; WIN64-KNL-NEXT: vmovaps %zmm14, 576(%rbp) # 64-byte Spill +; WIN64-KNL-NEXT: vmovaps %zmm13, 512(%rbp) # 64-byte Spill +; WIN64-KNL-NEXT: vmovaps %zmm12, 448(%rbp) # 64-byte Spill +; WIN64-KNL-NEXT: vmovaps %zmm11, 384(%rbp) # 64-byte Spill +; WIN64-KNL-NEXT: vmovaps %zmm10, 320(%rbp) # 64-byte Spill +; WIN64-KNL-NEXT: vmovaps %zmm9, 256(%rbp) # 64-byte Spill +; WIN64-KNL-NEXT: vmovaps %zmm8, 192(%rbp) # 64-byte Spill +; WIN64-KNL-NEXT: vmovaps %zmm7, 128(%rbp) # 64-byte Spill +; WIN64-KNL-NEXT: vmovaps %zmm6, 64(%rbp) # 64-byte Spill +; WIN64-KNL-NEXT: andq $-64, %rsp +; WIN64-KNL-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp) +; WIN64-KNL-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) +; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-KNL-NEXT: callq func_float16 +; WIN64-KNL-NEXT: vmovaps 64(%rbp), %zmm6 # 64-byte Reload +; WIN64-KNL-NEXT: vmovaps 128(%rbp), %zmm7 # 64-byte Reload +; WIN64-KNL-NEXT: vmovaps 192(%rbp), %zmm8 # 64-byte Reload +; WIN64-KNL-NEXT: vmovaps 256(%rbp), %zmm9 # 64-byte Reload +; WIN64-KNL-NEXT: vmovaps 320(%rbp), %zmm10 # 64-byte Reload +; WIN64-KNL-NEXT: vmovaps 384(%rbp), %zmm11 # 64-byte Reload +; WIN64-KNL-NEXT: vmovaps 448(%rbp), %zmm12 # 64-byte Reload +; WIN64-KNL-NEXT: vmovaps 512(%rbp), %zmm13 # 64-byte Reload +; WIN64-KNL-NEXT: vmovaps 576(%rbp), %zmm14 # 64-byte Reload +; WIN64-KNL-NEXT: vmovaps 640(%rbp), %zmm15 # 64-byte Reload +; WIN64-KNL-NEXT: vmovaps 704(%rbp), %zmm16 # 64-byte Reload +; WIN64-KNL-NEXT: vmovaps 768(%rbp), %zmm17 # 64-byte Reload +; WIN64-KNL-NEXT: vmovaps 832(%rbp), %zmm18 # 64-byte Reload +; WIN64-KNL-NEXT: vmovaps 896(%rbp), %zmm19 # 64-byte Reload +; WIN64-KNL-NEXT: vmovaps 992(%rbp), %zmm20 # 64-byte Reload +; WIN64-KNL-NEXT: vmovaps 1104(%rbp), %zmm21 # 64-byte Reload +; WIN64-KNL-NEXT: kmovw 1192(%rbp), %k4 # 2-byte Reload +; WIN64-KNL-NEXT: kmovw 1194(%rbp), %k5 # 2-byte Reload +; WIN64-KNL-NEXT: kmovw 1196(%rbp), %k6 # 2-byte Reload +; WIN64-KNL-NEXT: kmovw 1198(%rbp), %k7 # 2-byte Reload +; WIN64-KNL-NEXT: leaq 1200(%rbp), %rsp +; WIN64-KNL-NEXT: popq %rbp +; WIN64-KNL-NEXT: retq ; -; X64-LABEL: test_prolog_epilog: -; X64: ## %bb.0: -; X64-NEXT: pushq %rsi -; X64-NEXT: pushq %rdi -; X64-NEXT: subq $1192, %rsp ## imm = 0x4A8 -; X64-NEXT: kmovq %k7, {{[0-9]+}}(%rsp) ## 8-byte Spill -; X64-NEXT: kmovq %k6, {{[0-9]+}}(%rsp) ## 8-byte Spill -; X64-NEXT: kmovq %k5, {{[0-9]+}}(%rsp) ## 8-byte Spill -; X64-NEXT: kmovq %k4, {{[0-9]+}}(%rsp) ## 8-byte Spill -; X64-NEXT: vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill -; X64-NEXT: vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill -; X64-NEXT: vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill -; X64-NEXT: vmovups %zmm28, {{[0-9]+}}(%rsp) ## 64-byte Spill -; X64-NEXT: vmovups %zmm27, {{[0-9]+}}(%rsp) ## 64-byte Spill -; X64-NEXT: vmovups %zmm26, {{[0-9]+}}(%rsp) ## 64-byte Spill -; X64-NEXT: vmovups %zmm25, {{[0-9]+}}(%rsp) ## 64-byte Spill -; X64-NEXT: vmovups %zmm24, {{[0-9]+}}(%rsp) ## 64-byte Spill -; X64-NEXT: vmovups %zmm23, {{[0-9]+}}(%rsp) ## 64-byte Spill -; X64-NEXT: vmovups %zmm22, {{[0-9]+}}(%rsp) ## 64-byte Spill -; X64-NEXT: vmovups %zmm21, {{[0-9]+}}(%rsp) ## 64-byte Spill -; X64-NEXT: vmovups %zmm20, {{[0-9]+}}(%rsp) ## 64-byte Spill -; X64-NEXT: vmovups %zmm19, {{[0-9]+}}(%rsp) ## 64-byte Spill -; X64-NEXT: vmovups %zmm18, {{[0-9]+}}(%rsp) ## 64-byte Spill -; X64-NEXT: vmovups %zmm17, {{[0-9]+}}(%rsp) ## 64-byte Spill -; X64-NEXT: vmovups %zmm16, (%rsp) ## 64-byte Spill -; X64-NEXT: callq _func_float16 -; X64-NEXT: vmovups (%rsp), %zmm16 ## 64-byte Reload -; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm17 ## 64-byte Reload -; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm18 ## 64-byte Reload -; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm19 ## 64-byte Reload -; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm20 ## 64-byte Reload -; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm21 ## 64-byte Reload -; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm22 ## 64-byte Reload -; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm23 ## 64-byte Reload -; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm24 ## 64-byte Reload -; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm25 ## 64-byte Reload -; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm26 ## 64-byte Reload -; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm27 ## 64-byte Reload -; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm28 ## 64-byte Reload -; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm29 ## 64-byte Reload -; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload -; X64-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload -; X64-NEXT: kmovq {{[0-9]+}}(%rsp), %k4 ## 8-byte Reload -; X64-NEXT: kmovq {{[0-9]+}}(%rsp), %k5 ## 8-byte Reload -; X64-NEXT: kmovq {{[0-9]+}}(%rsp), %k6 ## 8-byte Reload -; X64-NEXT: kmovq {{[0-9]+}}(%rsp), %k7 ## 8-byte Reload -; X64-NEXT: addq $1192, %rsp ## imm = 0x4A8 -; X64-NEXT: popq %rdi -; X64-NEXT: popq %rsi -; X64-NEXT: retq +; WIN64-SKX-LABEL: test_prolog_epilog: +; WIN64-SKX: # %bb.0: +; WIN64-SKX-NEXT: pushq %rbp +; WIN64-SKX-NEXT: subq $1328, %rsp # imm = 0x530 +; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rbp +; WIN64-SKX-NEXT: kmovq %k7, 1192(%rbp) # 8-byte Spill +; WIN64-SKX-NEXT: kmovq %k6, 1184(%rbp) # 8-byte Spill +; WIN64-SKX-NEXT: kmovq %k5, 1176(%rbp) # 8-byte Spill +; WIN64-SKX-NEXT: kmovq %k4, 1168(%rbp) # 8-byte Spill +; WIN64-SKX-NEXT: vmovaps %zmm21, 1056(%rbp) # 64-byte Spill +; WIN64-SKX-NEXT: vmovaps %zmm20, 960(%rbp) # 64-byte Spill +; WIN64-SKX-NEXT: vmovaps %zmm19, 896(%rbp) # 64-byte Spill +; WIN64-SKX-NEXT: vmovaps %zmm18, 832(%rbp) # 64-byte Spill +; WIN64-SKX-NEXT: vmovaps %zmm17, 768(%rbp) # 64-byte Spill +; WIN64-SKX-NEXT: vmovaps %zmm16, 704(%rbp) # 64-byte Spill +; WIN64-SKX-NEXT: vmovaps %zmm15, 640(%rbp) # 64-byte Spill +; WIN64-SKX-NEXT: vmovaps %zmm14, 576(%rbp) # 64-byte Spill +; WIN64-SKX-NEXT: vmovaps %zmm13, 512(%rbp) # 64-byte Spill +; WIN64-SKX-NEXT: vmovaps %zmm12, 448(%rbp) # 64-byte Spill +; WIN64-SKX-NEXT: vmovaps %zmm11, 384(%rbp) # 64-byte Spill +; WIN64-SKX-NEXT: vmovaps %zmm10, 320(%rbp) # 64-byte Spill +; WIN64-SKX-NEXT: vmovaps %zmm9, 256(%rbp) # 64-byte Spill +; WIN64-SKX-NEXT: vmovaps %zmm8, 192(%rbp) # 64-byte Spill +; WIN64-SKX-NEXT: vmovaps %zmm7, 128(%rbp) # 64-byte Spill +; WIN64-SKX-NEXT: vmovaps %zmm6, 64(%rbp) # 64-byte Spill +; WIN64-SKX-NEXT: andq $-64, %rsp +; WIN64-SKX-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp) +; WIN64-SKX-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) +; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-SKX-NEXT: callq func_float16 +; WIN64-SKX-NEXT: vmovaps 64(%rbp), %zmm6 # 64-byte Reload +; WIN64-SKX-NEXT: vmovaps 128(%rbp), %zmm7 # 64-byte Reload +; WIN64-SKX-NEXT: vmovaps 192(%rbp), %zmm8 # 64-byte Reload +; WIN64-SKX-NEXT: vmovaps 256(%rbp), %zmm9 # 64-byte Reload +; WIN64-SKX-NEXT: vmovaps 320(%rbp), %zmm10 # 64-byte Reload +; WIN64-SKX-NEXT: vmovaps 384(%rbp), %zmm11 # 64-byte Reload +; WIN64-SKX-NEXT: vmovaps 448(%rbp), %zmm12 # 64-byte Reload +; WIN64-SKX-NEXT: vmovaps 512(%rbp), %zmm13 # 64-byte Reload +; WIN64-SKX-NEXT: vmovaps 576(%rbp), %zmm14 # 64-byte Reload +; WIN64-SKX-NEXT: vmovaps 640(%rbp), %zmm15 # 64-byte Reload +; WIN64-SKX-NEXT: vmovaps 704(%rbp), %zmm16 # 64-byte Reload +; WIN64-SKX-NEXT: vmovaps 768(%rbp), %zmm17 # 64-byte Reload +; WIN64-SKX-NEXT: vmovaps 832(%rbp), %zmm18 # 64-byte Reload +; WIN64-SKX-NEXT: vmovaps 896(%rbp), %zmm19 # 64-byte Reload +; WIN64-SKX-NEXT: vmovaps 960(%rbp), %zmm20 # 64-byte Reload +; WIN64-SKX-NEXT: vmovaps 1056(%rbp), %zmm21 # 64-byte Reload +; WIN64-SKX-NEXT: kmovq 1168(%rbp), %k4 # 8-byte Reload +; WIN64-SKX-NEXT: kmovq 1176(%rbp), %k5 # 8-byte Reload +; WIN64-SKX-NEXT: kmovq 1184(%rbp), %k6 # 8-byte Reload +; WIN64-SKX-NEXT: kmovq 1192(%rbp), %k7 # 8-byte Reload +; WIN64-SKX-NEXT: leaq 1200(%rbp), %rsp +; WIN64-SKX-NEXT: popq %rbp +; WIN64-SKX-NEXT: retq +; +; X64-KNL-LABEL: test_prolog_epilog: +; X64-KNL: ## %bb.0: +; X64-KNL-NEXT: pushq %rsi +; X64-KNL-NEXT: pushq %rdi +; X64-KNL-NEXT: subq $1064, %rsp ## imm = 0x428 +; X64-KNL-NEXT: kmovw %k7, {{[0-9]+}}(%rsp) ## 2-byte Spill +; X64-KNL-NEXT: kmovw %k6, {{[0-9]+}}(%rsp) ## 2-byte Spill +; X64-KNL-NEXT: kmovw %k5, {{[0-9]+}}(%rsp) ## 2-byte Spill +; X64-KNL-NEXT: kmovw %k4, {{[0-9]+}}(%rsp) ## 2-byte Spill +; X64-KNL-NEXT: vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-KNL-NEXT: vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-KNL-NEXT: vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-KNL-NEXT: vmovups %zmm28, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-KNL-NEXT: vmovups %zmm27, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-KNL-NEXT: vmovups %zmm26, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-KNL-NEXT: vmovups %zmm25, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-KNL-NEXT: vmovups %zmm24, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-KNL-NEXT: vmovups %zmm23, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-KNL-NEXT: vmovups %zmm22, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-KNL-NEXT: vmovups %zmm21, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-KNL-NEXT: vmovups %zmm20, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-KNL-NEXT: vmovups %zmm19, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-KNL-NEXT: vmovups %zmm18, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-KNL-NEXT: vmovups %zmm17, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-KNL-NEXT: vmovups %zmm16, (%rsp) ## 64-byte Spill +; X64-KNL-NEXT: callq _func_float16 +; X64-KNL-NEXT: vmovups (%rsp), %zmm16 ## 64-byte Reload +; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm17 ## 64-byte Reload +; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm18 ## 64-byte Reload +; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm19 ## 64-byte Reload +; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm20 ## 64-byte Reload +; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm21 ## 64-byte Reload +; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm22 ## 64-byte Reload +; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm23 ## 64-byte Reload +; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm24 ## 64-byte Reload +; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm25 ## 64-byte Reload +; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm26 ## 64-byte Reload +; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm27 ## 64-byte Reload +; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm28 ## 64-byte Reload +; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm29 ## 64-byte Reload +; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload +; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload +; X64-KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k4 ## 2-byte Reload +; X64-KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k5 ## 2-byte Reload +; X64-KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k6 ## 2-byte Reload +; X64-KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k7 ## 2-byte Reload +; X64-KNL-NEXT: addq $1064, %rsp ## imm = 0x428 +; X64-KNL-NEXT: popq %rdi +; X64-KNL-NEXT: popq %rsi +; X64-KNL-NEXT: retq +; +; X64-SKX-LABEL: test_prolog_epilog: +; X64-SKX: ## %bb.0: +; X64-SKX-NEXT: pushq %rsi +; X64-SKX-NEXT: pushq %rdi +; X64-SKX-NEXT: subq $1192, %rsp ## imm = 0x4A8 +; X64-SKX-NEXT: kmovq %k7, {{[0-9]+}}(%rsp) ## 8-byte Spill +; X64-SKX-NEXT: kmovq %k6, {{[0-9]+}}(%rsp) ## 8-byte Spill +; X64-SKX-NEXT: kmovq %k5, {{[0-9]+}}(%rsp) ## 8-byte Spill +; X64-SKX-NEXT: kmovq %k4, {{[0-9]+}}(%rsp) ## 8-byte Spill +; X64-SKX-NEXT: vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-SKX-NEXT: vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-SKX-NEXT: vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-SKX-NEXT: vmovups %zmm28, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-SKX-NEXT: vmovups %zmm27, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-SKX-NEXT: vmovups %zmm26, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-SKX-NEXT: vmovups %zmm25, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-SKX-NEXT: vmovups %zmm24, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-SKX-NEXT: vmovups %zmm23, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-SKX-NEXT: vmovups %zmm22, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-SKX-NEXT: vmovups %zmm21, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-SKX-NEXT: vmovups %zmm20, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-SKX-NEXT: vmovups %zmm19, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-SKX-NEXT: vmovups %zmm18, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-SKX-NEXT: vmovups %zmm17, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-SKX-NEXT: vmovups %zmm16, (%rsp) ## 64-byte Spill +; X64-SKX-NEXT: callq _func_float16 +; X64-SKX-NEXT: vmovups (%rsp), %zmm16 ## 64-byte Reload +; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm17 ## 64-byte Reload +; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm18 ## 64-byte Reload +; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm19 ## 64-byte Reload +; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm20 ## 64-byte Reload +; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm21 ## 64-byte Reload +; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm22 ## 64-byte Reload +; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm23 ## 64-byte Reload +; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm24 ## 64-byte Reload +; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm25 ## 64-byte Reload +; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm26 ## 64-byte Reload +; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm27 ## 64-byte Reload +; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm28 ## 64-byte Reload +; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm29 ## 64-byte Reload +; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload +; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload +; X64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k4 ## 8-byte Reload +; X64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k5 ## 8-byte Reload +; X64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k6 ## 8-byte Reload +; X64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k7 ## 8-byte Reload +; X64-SKX-NEXT: addq $1192, %rsp ## imm = 0x4A8 +; X64-SKX-NEXT: popq %rdi +; X64-SKX-NEXT: popq %rsi +; X64-SKX-NEXT: retq %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b) ret <16 x float> %c } diff --git a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll index 6149353..8188479 100644 --- a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll +++ b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll @@ -27,26 +27,26 @@ define x86_intrcc void @foo(i8* %frame) { ; CHECK64-KNL-NEXT: .cfi_def_cfa_offset 72 ; CHECK64-KNL-NEXT: pushq %rcx ## encoding: [0x51] ; CHECK64-KNL-NEXT: .cfi_def_cfa_offset 80 -; CHECK64-KNL-NEXT: subq $2160, %rsp ## encoding: [0x48,0x81,0xec,0x70,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: ## imm = 0x870 -; CHECK64-KNL-NEXT: kmovq %k7, {{[0-9]+}}(%rsp) ## 8-byte Spill -; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: kmovq %k6, {{[0-9]+}}(%rsp) ## 8-byte Spill -; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: kmovq %k5, {{[0-9]+}}(%rsp) ## 8-byte Spill -; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: kmovq %k4, {{[0-9]+}}(%rsp) ## 8-byte Spill -; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: kmovq %k3, {{[0-9]+}}(%rsp) ## 8-byte Spill -; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: kmovq %k2, {{[0-9]+}}(%rsp) ## 8-byte Spill -; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: kmovq %k1, {{[0-9]+}}(%rsp) ## 8-byte Spill -; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: kmovq %k0, {{[0-9]+}}(%rsp) ## 8-byte Spill -; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: subq $2096, %rsp ## encoding: [0x48,0x81,0xec,0x30,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: ## imm = 0x830 +; CHECK64-KNL-NEXT: kmovw %k7, {{[0-9]+}}(%rsp) ## 2-byte Spill +; CHECK64-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xbc,0x24,0x2e,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: kmovw %k6, {{[0-9]+}}(%rsp) ## 2-byte Spill +; CHECK64-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xb4,0x24,0x2c,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: kmovw %k5, {{[0-9]+}}(%rsp) ## 2-byte Spill +; CHECK64-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xac,0x24,0x2a,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: kmovw %k4, {{[0-9]+}}(%rsp) ## 2-byte Spill +; CHECK64-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xa4,0x24,0x28,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: kmovw %k3, {{[0-9]+}}(%rsp) ## 2-byte Spill +; CHECK64-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x9c,0x24,0x26,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: kmovw %k2, {{[0-9]+}}(%rsp) ## 2-byte Spill +; CHECK64-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x94,0x24,0x24,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill +; CHECK64-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x8c,0x24,0x22,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill +; CHECK64-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x84,0x24,0x20,0x08,0x00,0x00] ; CHECK64-KNL-NEXT: vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill -; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x07,0x00,0x00] +; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x7c,0x24,0x1f] ; CHECK64-KNL-NEXT: vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill ; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x74,0x24,0x1e] ; CHECK64-KNL-NEXT: vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill @@ -109,7 +109,7 @@ define x86_intrcc void @foo(i8* %frame) { ; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01] ; CHECK64-KNL-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill ; CHECK64-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24] -; CHECK64-KNL-NEXT: .cfi_def_cfa_offset 2240 +; CHECK64-KNL-NEXT: .cfi_def_cfa_offset 2176 ; CHECK64-KNL-NEXT: .cfi_offset %rcx, -80 ; CHECK64-KNL-NEXT: .cfi_offset %rdx, -72 ; CHECK64-KNL-NEXT: .cfi_offset %rsi, -64 @@ -119,46 +119,46 @@ define x86_intrcc void @foo(i8* %frame) { ; CHECK64-KNL-NEXT: .cfi_offset %r10, -32 ; CHECK64-KNL-NEXT: .cfi_offset %r11, -24 ; CHECK64-KNL-NEXT: .cfi_offset %rax, -16 -; CHECK64-KNL-NEXT: .cfi_offset %xmm0, -2240 -; CHECK64-KNL-NEXT: .cfi_offset %xmm1, -2176 -; CHECK64-KNL-NEXT: .cfi_offset %xmm2, -2112 -; CHECK64-KNL-NEXT: .cfi_offset %xmm3, -2048 -; CHECK64-KNL-NEXT: .cfi_offset %xmm4, -1984 -; CHECK64-KNL-NEXT: .cfi_offset %xmm5, -1920 -; CHECK64-KNL-NEXT: .cfi_offset %xmm6, -1856 -; CHECK64-KNL-NEXT: .cfi_offset %xmm7, -1792 -; CHECK64-KNL-NEXT: .cfi_offset %xmm8, -1728 -; CHECK64-KNL-NEXT: .cfi_offset %xmm9, -1664 -; CHECK64-KNL-NEXT: .cfi_offset %xmm10, -1600 -; CHECK64-KNL-NEXT: .cfi_offset %xmm11, -1536 -; CHECK64-KNL-NEXT: .cfi_offset %xmm12, -1472 -; CHECK64-KNL-NEXT: .cfi_offset %xmm13, -1408 -; CHECK64-KNL-NEXT: .cfi_offset %xmm14, -1344 -; CHECK64-KNL-NEXT: .cfi_offset %xmm15, -1280 -; CHECK64-KNL-NEXT: .cfi_offset %xmm16, -1216 -; CHECK64-KNL-NEXT: .cfi_offset %xmm17, -1152 -; CHECK64-KNL-NEXT: .cfi_offset %xmm18, -1088 -; CHECK64-KNL-NEXT: .cfi_offset %xmm19, -1024 -; CHECK64-KNL-NEXT: .cfi_offset %xmm20, -960 -; CHECK64-KNL-NEXT: .cfi_offset %xmm21, -896 -; CHECK64-KNL-NEXT: .cfi_offset %xmm22, -832 -; CHECK64-KNL-NEXT: .cfi_offset %xmm23, -768 -; CHECK64-KNL-NEXT: .cfi_offset %xmm24, -704 -; CHECK64-KNL-NEXT: .cfi_offset %xmm25, -640 -; CHECK64-KNL-NEXT: .cfi_offset %xmm26, -576 -; CHECK64-KNL-NEXT: .cfi_offset %xmm27, -512 -; CHECK64-KNL-NEXT: .cfi_offset %xmm28, -448 -; CHECK64-KNL-NEXT: .cfi_offset %xmm29, -384 -; CHECK64-KNL-NEXT: .cfi_offset %xmm30, -320 -; CHECK64-KNL-NEXT: .cfi_offset %xmm31, -224 -; CHECK64-KNL-NEXT: .cfi_offset %k0, -144 -; CHECK64-KNL-NEXT: .cfi_offset %k1, -136 -; CHECK64-KNL-NEXT: .cfi_offset %k2, -128 -; CHECK64-KNL-NEXT: .cfi_offset %k3, -120 -; CHECK64-KNL-NEXT: .cfi_offset %k4, -112 -; CHECK64-KNL-NEXT: .cfi_offset %k5, -104 -; CHECK64-KNL-NEXT: .cfi_offset %k6, -96 -; CHECK64-KNL-NEXT: .cfi_offset %k7, -88 +; CHECK64-KNL-NEXT: .cfi_offset %xmm0, -2176 +; CHECK64-KNL-NEXT: .cfi_offset %xmm1, -2112 +; CHECK64-KNL-NEXT: .cfi_offset %xmm2, -2048 +; CHECK64-KNL-NEXT: .cfi_offset %xmm3, -1984 +; CHECK64-KNL-NEXT: .cfi_offset %xmm4, -1920 +; CHECK64-KNL-NEXT: .cfi_offset %xmm5, -1856 +; CHECK64-KNL-NEXT: .cfi_offset %xmm6, -1792 +; CHECK64-KNL-NEXT: .cfi_offset %xmm7, -1728 +; CHECK64-KNL-NEXT: .cfi_offset %xmm8, -1664 +; CHECK64-KNL-NEXT: .cfi_offset %xmm9, -1600 +; CHECK64-KNL-NEXT: .cfi_offset %xmm10, -1536 +; CHECK64-KNL-NEXT: .cfi_offset %xmm11, -1472 +; CHECK64-KNL-NEXT: .cfi_offset %xmm12, -1408 +; CHECK64-KNL-NEXT: .cfi_offset %xmm13, -1344 +; CHECK64-KNL-NEXT: .cfi_offset %xmm14, -1280 +; CHECK64-KNL-NEXT: .cfi_offset %xmm15, -1216 +; CHECK64-KNL-NEXT: .cfi_offset %xmm16, -1152 +; CHECK64-KNL-NEXT: .cfi_offset %xmm17, -1088 +; CHECK64-KNL-NEXT: .cfi_offset %xmm18, -1024 +; CHECK64-KNL-NEXT: .cfi_offset %xmm19, -960 +; CHECK64-KNL-NEXT: .cfi_offset %xmm20, -896 +; CHECK64-KNL-NEXT: .cfi_offset %xmm21, -832 +; CHECK64-KNL-NEXT: .cfi_offset %xmm22, -768 +; CHECK64-KNL-NEXT: .cfi_offset %xmm23, -704 +; CHECK64-KNL-NEXT: .cfi_offset %xmm24, -640 +; CHECK64-KNL-NEXT: .cfi_offset %xmm25, -576 +; CHECK64-KNL-NEXT: .cfi_offset %xmm26, -512 +; CHECK64-KNL-NEXT: .cfi_offset %xmm27, -448 +; CHECK64-KNL-NEXT: .cfi_offset %xmm28, -384 +; CHECK64-KNL-NEXT: .cfi_offset %xmm29, -320 +; CHECK64-KNL-NEXT: .cfi_offset %xmm30, -256 +; CHECK64-KNL-NEXT: .cfi_offset %xmm31, -192 +; CHECK64-KNL-NEXT: .cfi_offset %k0, -96 +; CHECK64-KNL-NEXT: .cfi_offset %k1, -94 +; CHECK64-KNL-NEXT: .cfi_offset %k2, -92 +; CHECK64-KNL-NEXT: .cfi_offset %k3, -90 +; CHECK64-KNL-NEXT: .cfi_offset %k4, -88 +; CHECK64-KNL-NEXT: .cfi_offset %k5, -86 +; CHECK64-KNL-NEXT: .cfi_offset %k6, -84 +; CHECK64-KNL-NEXT: .cfi_offset %k7, -82 ; CHECK64-KNL-NEXT: cld ## encoding: [0xfc] ; CHECK64-KNL-NEXT: callq _bar ## encoding: [0xe8,A,A,A,A] ; CHECK64-KNL-NEXT: ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4 @@ -225,25 +225,25 @@ define x86_intrcc void @foo(i8* %frame) { ; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload ; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x74,0x24,0x1e] ; CHECK64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload -; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x07,0x00,0x00] -; CHECK64-KNL-NEXT: kmovq {{[0-9]+}}(%rsp), %k0 ## 8-byte Reload -; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: kmovq {{[0-9]+}}(%rsp), %k1 ## 8-byte Reload -; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: kmovq {{[0-9]+}}(%rsp), %k2 ## 8-byte Reload -; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: kmovq {{[0-9]+}}(%rsp), %k3 ## 8-byte Reload -; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: kmovq {{[0-9]+}}(%rsp), %k4 ## 8-byte Reload -; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: kmovq {{[0-9]+}}(%rsp), %k5 ## 8-byte Reload -; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: kmovq {{[0-9]+}}(%rsp), %k6 ## 8-byte Reload -; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: kmovq {{[0-9]+}}(%rsp), %k7 ## 8-byte Reload -; CHECK64-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: addq $2160, %rsp ## encoding: [0x48,0x81,0xc4,0x70,0x08,0x00,0x00] -; CHECK64-KNL-NEXT: ## imm = 0x870 +; CHECK64-KNL-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x7c,0x24,0x1f] +; CHECK64-KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload +; CHECK64-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x84,0x24,0x20,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Reload +; CHECK64-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x8c,0x24,0x22,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ## 2-byte Reload +; CHECK64-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x94,0x24,0x24,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k3 ## 2-byte Reload +; CHECK64-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x9c,0x24,0x26,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k4 ## 2-byte Reload +; CHECK64-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xa4,0x24,0x28,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k5 ## 2-byte Reload +; CHECK64-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xac,0x24,0x2a,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k6 ## 2-byte Reload +; CHECK64-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xb4,0x24,0x2c,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k7 ## 2-byte Reload +; CHECK64-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xbc,0x24,0x2e,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: addq $2096, %rsp ## encoding: [0x48,0x81,0xc4,0x30,0x08,0x00,0x00] +; CHECK64-KNL-NEXT: ## imm = 0x830 ; CHECK64-KNL-NEXT: popq %rcx ## encoding: [0x59] ; CHECK64-KNL-NEXT: popq %rdx ## encoding: [0x5a] ; CHECK64-KNL-NEXT: popq %rsi ## encoding: [0x5e] @@ -512,26 +512,26 @@ define x86_intrcc void @foo(i8* %frame) { ; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 12 ; CHECK32-KNL-NEXT: pushl %eax ## encoding: [0x50] ; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 16 -; CHECK32-KNL-NEXT: subl $624, %esp ## encoding: [0x81,0xec,0x70,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: ## imm = 0x270 -; CHECK32-KNL-NEXT: kmovq %k7, {{[0-9]+}}(%esp) ## 8-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: kmovq %k6, {{[0-9]+}}(%esp) ## 8-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: kmovq %k5, {{[0-9]+}}(%esp) ## 8-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: kmovq %k4, {{[0-9]+}}(%esp) ## 8-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: kmovq %k3, {{[0-9]+}}(%esp) ## 8-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: kmovq %k2, {{[0-9]+}}(%esp) ## 8-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: kmovq %k1, {{[0-9]+}}(%esp) ## 8-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ## 8-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: subl $560, %esp ## encoding: [0x81,0xec,0x30,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## imm = 0x230 +; CHECK32-KNL-NEXT: kmovw %k7, {{[0-9]+}}(%esp) ## 2-byte Spill +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xbc,0x24,0x2e,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: kmovw %k6, {{[0-9]+}}(%esp) ## 2-byte Spill +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xb4,0x24,0x2c,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: kmovw %k5, {{[0-9]+}}(%esp) ## 2-byte Spill +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xac,0x24,0x2a,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: kmovw %k4, {{[0-9]+}}(%esp) ## 2-byte Spill +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xa4,0x24,0x28,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: kmovw %k3, {{[0-9]+}}(%esp) ## 2-byte Spill +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x9c,0x24,0x26,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: kmovw %k2, {{[0-9]+}}(%esp) ## 2-byte Spill +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x94,0x24,0x24,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: kmovw %k1, {{[0-9]+}}(%esp) ## 2-byte Spill +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x8c,0x24,0x22,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: kmovw %k0, {{[0-9]+}}(%esp) ## 2-byte Spill +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x84,0x24,0x20,0x02,0x00,0x00] ; CHECK32-KNL-NEXT: vmovups %zmm7, {{[0-9]+}}(%esp) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x01,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07] ; CHECK32-KNL-NEXT: vmovups %zmm6, {{[0-9]+}}(%esp) ## 64-byte Spill ; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06] ; CHECK32-KNL-NEXT: vmovups %zmm5, {{[0-9]+}}(%esp) ## 64-byte Spill @@ -546,26 +546,26 @@ define x86_intrcc void @foo(i8* %frame) { ; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01] ; CHECK32-KNL-NEXT: vmovups %zmm0, (%esp) ## 64-byte Spill ; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24] -; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 640 +; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 576 ; CHECK32-KNL-NEXT: .cfi_offset %eax, -16 ; CHECK32-KNL-NEXT: .cfi_offset %ecx, -12 ; CHECK32-KNL-NEXT: .cfi_offset %edx, -8 -; CHECK32-KNL-NEXT: .cfi_offset %xmm0, -640 -; CHECK32-KNL-NEXT: .cfi_offset %xmm1, -576 -; CHECK32-KNL-NEXT: .cfi_offset %xmm2, -512 -; CHECK32-KNL-NEXT: .cfi_offset %xmm3, -448 -; CHECK32-KNL-NEXT: .cfi_offset %xmm4, -384 -; CHECK32-KNL-NEXT: .cfi_offset %xmm5, -320 -; CHECK32-KNL-NEXT: .cfi_offset %xmm6, -256 -; CHECK32-KNL-NEXT: .cfi_offset %xmm7, -160 -; CHECK32-KNL-NEXT: .cfi_offset %k0, -80 -; CHECK32-KNL-NEXT: .cfi_offset %k1, -72 -; CHECK32-KNL-NEXT: .cfi_offset %k2, -64 -; CHECK32-KNL-NEXT: .cfi_offset %k3, -56 -; CHECK32-KNL-NEXT: .cfi_offset %k4, -48 -; CHECK32-KNL-NEXT: .cfi_offset %k5, -40 -; CHECK32-KNL-NEXT: .cfi_offset %k6, -32 -; CHECK32-KNL-NEXT: .cfi_offset %k7, -24 +; CHECK32-KNL-NEXT: .cfi_offset %xmm0, -576 +; CHECK32-KNL-NEXT: .cfi_offset %xmm1, -512 +; CHECK32-KNL-NEXT: .cfi_offset %xmm2, -448 +; CHECK32-KNL-NEXT: .cfi_offset %xmm3, -384 +; CHECK32-KNL-NEXT: .cfi_offset %xmm4, -320 +; CHECK32-KNL-NEXT: .cfi_offset %xmm5, -256 +; CHECK32-KNL-NEXT: .cfi_offset %xmm6, -192 +; CHECK32-KNL-NEXT: .cfi_offset %xmm7, -128 +; CHECK32-KNL-NEXT: .cfi_offset %k0, -32 +; CHECK32-KNL-NEXT: .cfi_offset %k1, -30 +; CHECK32-KNL-NEXT: .cfi_offset %k2, -28 +; CHECK32-KNL-NEXT: .cfi_offset %k3, -26 +; CHECK32-KNL-NEXT: .cfi_offset %k4, -24 +; CHECK32-KNL-NEXT: .cfi_offset %k5, -22 +; CHECK32-KNL-NEXT: .cfi_offset %k6, -20 +; CHECK32-KNL-NEXT: .cfi_offset %k7, -18 ; CHECK32-KNL-NEXT: cld ## encoding: [0xfc] ; CHECK32-KNL-NEXT: calll _bar ## encoding: [0xe8,A,A,A,A] ; CHECK32-KNL-NEXT: ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4 @@ -584,25 +584,25 @@ define x86_intrcc void @foo(i8* %frame) { ; CHECK32-KNL-NEXT: vmovups {{[0-9]+}}(%esp), %zmm6 ## 64-byte Reload ; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06] ; CHECK32-KNL-NEXT: vmovups {{[0-9]+}}(%esp), %zmm7 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x01,0x00,0x00] -; CHECK32-KNL-NEXT: kmovq {{[0-9]+}}(%esp), %k0 ## 8-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ## 8-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: kmovq {{[0-9]+}}(%esp), %k2 ## 8-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: kmovq {{[0-9]+}}(%esp), %k3 ## 8-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: kmovq {{[0-9]+}}(%esp), %k4 ## 8-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: kmovq {{[0-9]+}}(%esp), %k5 ## 8-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: kmovq {{[0-9]+}}(%esp), %k6 ## 8-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: kmovq {{[0-9]+}}(%esp), %k7 ## 8-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: addl $624, %esp ## encoding: [0x81,0xc4,0x70,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: ## imm = 0x270 +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07] +; CHECK32-KNL-NEXT: kmovw {{[0-9]+}}(%esp), %k0 ## 2-byte Reload +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x84,0x24,0x20,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## 2-byte Reload +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x8c,0x24,0x22,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: kmovw {{[0-9]+}}(%esp), %k2 ## 2-byte Reload +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x94,0x24,0x24,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: kmovw {{[0-9]+}}(%esp), %k3 ## 2-byte Reload +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x9c,0x24,0x26,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: kmovw {{[0-9]+}}(%esp), %k4 ## 2-byte Reload +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xa4,0x24,0x28,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: kmovw {{[0-9]+}}(%esp), %k5 ## 2-byte Reload +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xac,0x24,0x2a,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: kmovw {{[0-9]+}}(%esp), %k6 ## 2-byte Reload +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xb4,0x24,0x2c,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: kmovw {{[0-9]+}}(%esp), %k7 ## 2-byte Reload +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xbc,0x24,0x2e,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: addl $560, %esp ## encoding: [0x81,0xc4,0x30,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## imm = 0x230 ; CHECK32-KNL-NEXT: popl %eax ## encoding: [0x58] ; CHECK32-KNL-NEXT: popl %ecx ## encoding: [0x59] ; CHECK32-KNL-NEXT: popl %edx ## encoding: [0x5a]