From 5e7815b695d4a21f021f23ed41750a44e9eb3849 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 1 Jul 2019 19:01:37 +0000 Subject: [PATCH] [X86] Correct v4f32->v2i64 cvt(t)ps2(u)qq memory isel patterns These instructions only read 64-bits of memory so we shouldn't allow a full vector width load to be pattern matched in case it is marked volatile. Instead allow vzload or scalar_to_vector+load. Also add a DAG combine to turn full vector loads into vzload when used by one of these instructions if the load isn't volatile. This fixes another case for PR42079 llvm-svn: 364838 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 39 +++++ llvm/lib/Target/X86/X86InstrAVX512.td | 56 ++++++- llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll | 196 +++++++------------------ llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll | 6 +- llvm/test/CodeGen/X86/vec_fp_to_int.ll | 6 +- 5 files changed, 147 insertions(+), 156 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 71bab13..a47a666 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -41144,6 +41144,41 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); + + // Convert a full vector load into vzload when not all bits are needed. + SDValue In = N->getOperand(0); + MVT InVT = In.getSimpleValueType(); + if (VT.getVectorNumElements() < InVT.getVectorNumElements() && + ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) { + assert(InVT.is128BitVector() && "Expected 128-bit input vector"); + LoadSDNode *LN = cast(N->getOperand(0)); + // Unless the load is volatile. + if (!LN->isVolatile()) { + SDLoc dl(N); + unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements(); + MVT MemVT = MVT::getFloatingPointVT(NumBits); + MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits); + SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other); + SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; + SDValue VZLoad = + DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, MemVT, + LN->getPointerInfo(), + LN->getAlignment(), + LN->getMemOperand()->getFlags()); + SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT, + DAG.getBitcast(InVT, VZLoad)); + DCI.CombineTo(N, Convert); + DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); + return SDValue(N, 0); + } + } + + return SDValue(); +} + /// Do target-specific dag combines on X86ISD::ANDNP nodes. static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -43940,6 +43975,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget); case X86ISD::CVTSI2P: case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI); + case X86ISD::CVTP2SI: + case X86ISD::CVTP2UI: + case X86ISD::CVTTP2SI: + case X86ISD::CVTTP2UI: return combineCVTP2I_CVTTP2I(N, DAG, DCI); case X86ISD::BT: return combineBT(N, DAG, DCI); case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 2c9b6f1..0b3d0f5 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7979,7 +7979,11 @@ multiclass avx512_cvtps2qq opc, string OpcodeStr, SDNode OpNode, // Explicitly specified broadcast string, since we take only 2 elements // from v4f32x_info source defm Z128 : avx512_vcvt_fp, EVEX_V128; + sched.XMM, "{1to2}", "", f64mem, VK2WM, + (v2i64 (OpNode (bc_v4f32 + (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))>, + EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } @@ -7997,7 +8001,11 @@ multiclass avx512_cvttps2qq opc, string OpcodeStr, SDNode OpNode, // Explicitly specified broadcast string, since we take only 2 elements // from v4f32x_info source defm Z128 : avx512_vcvt_fp, EVEX_V128; + sched.XMM, "{1to2}", "", f64mem, VK2WM, + (v2i64 (OpNode (bc_v4f32 + (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))>, + EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } @@ -8358,6 +8366,50 @@ let Predicates = [HasDQI] in { } let Predicates = [HasDQI, HasVLX] in { + def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src))))), + (VCVTPS2QQZ128rm addr:$src)>; + def : Pat<(v2i64 (vselect VK2WM:$mask, + (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src)))), + VR128X:$src0)), + (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(v2i64 (vselect VK2WM:$mask, + (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src)))), + v2i64x_info.ImmAllZerosV)), + (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; + + def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src))))), + (VCVTPS2UQQZ128rm addr:$src)>; + def : Pat<(v2i64 (vselect VK2WM:$mask, + (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src)))), + VR128X:$src0)), + (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(v2i64 (vselect VK2WM:$mask, + (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src)))), + v2i64x_info.ImmAllZerosV)), + (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; + + def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src))))), + (VCVTTPS2QQZ128rm addr:$src)>; + def : Pat<(v2i64 (vselect VK2WM:$mask, + (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src)))), + VR128X:$src0)), + (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(v2i64 (vselect VK2WM:$mask, + (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src)))), + v2i64x_info.ImmAllZerosV)), + (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; + + def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src))))), + (VCVTTPS2UQQZ128rm addr:$src)>; + def : Pat<(v2i64 (vselect VK2WM:$mask, + (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src)))), + VR128X:$src0)), + (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(v2i64 (vselect VK2WM:$mask, + (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src)))), + v2i64x_info.ImmAllZerosV)), + (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; + def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))), (VCVTTPS2QQZ256rr VR128X:$src)>; def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))), diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll index 4197f57..431d6f9 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double>, <2 x i64>, i8) @@ -126,16 +126,12 @@ define <2 x i64> @test_int_x86_avx512_cvt_ps2qq_128_load(<2 x float>* %p) { ; X86-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X86-NEXT: # xmm0 = mem[0],zero -; X86-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0] +; X86-NEXT: vcvtps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load: ; X64: # %bb.0: -; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-NEXT: # xmm0 = mem[0],zero -; X64-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0] +; X64-NEXT: vcvtps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -147,18 +143,14 @@ define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2qq_128_load(<2 x float>* %p, < ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] -; X86-NEXT: # xmm1 = mem[0],zero ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vcvtps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc1] +; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f] -; X64-NEXT: # xmm1 = mem[0],zero -; X64-NEXT: vcvtps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc1] +; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -170,18 +162,14 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2qq_128_load(<2 x float>* %p, ; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X86-NEXT: # xmm0 = mem[0],zero ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vcvtps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0xc0] +; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-NEXT: # xmm0 = mem[0],zero -; X64-NEXT: vcvtps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0xc0] +; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -194,16 +182,12 @@ define <2 x i64> @test_int_x86_avx512_cvt_ps2qq_128_load_2(<2 x float>* %p) { ; X86-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X86-NEXT: # xmm0 = mem[0],zero -; X86-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0] +; X86-NEXT: vcvtps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_2: ; X64: # %bb.0: -; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-NEXT: # xmm0 = mem[0],zero -; X64-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0] +; X64-NEXT: vcvtps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -215,18 +199,14 @@ define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2qq_128_load_2(<2 x float>* %p, ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] -; X86-NEXT: # xmm1 = mem[0],zero ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vcvtps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc1] +; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f] -; X64-NEXT: # xmm1 = mem[0],zero -; X64-NEXT: vcvtps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc1] +; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -238,18 +218,14 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2(<2 x float>* %p ; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X86-NEXT: # xmm0 = mem[0],zero ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vcvtps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0xc0] +; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-NEXT: # xmm0 = mem[0],zero -; X64-NEXT: vcvtps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0xc0] +; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -361,16 +337,12 @@ define <2 x i64> @test_int_x86_avx512_cvt_ps2uqq_128_load(<2 x float>* %p) { ; X86-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X86-NEXT: # xmm0 = mem[0],zero -; X86-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0] +; X86-NEXT: vcvtps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load: ; X64: # %bb.0: -; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-NEXT: # xmm0 = mem[0],zero -; X64-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0] +; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -382,18 +354,14 @@ define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2uqq_128_load(<2 x float>* %p, ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] -; X86-NEXT: # xmm1 = mem[0],zero ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vcvtps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc1] +; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f] -; X64-NEXT: # xmm1 = mem[0],zero -; X64-NEXT: vcvtps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc1] +; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -405,18 +373,14 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2uqq_128_load(<2 x float>* %p, ; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X86-NEXT: # xmm0 = mem[0],zero ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vcvtps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0xc0] +; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-NEXT: # xmm0 = mem[0],zero -; X64-NEXT: vcvtps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0xc0] +; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -428,16 +392,12 @@ define <2 x i64> @test_int_x86_avx512_cvt_ps2uqq_128_load_2(<2 x float>* %p) { ; X86-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X86-NEXT: # xmm0 = mem[0],zero -; X86-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0] +; X86-NEXT: vcvtps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_2: ; X64: # %bb.0: -; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-NEXT: # xmm0 = mem[0],zero -; X64-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0] +; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -449,18 +409,14 @@ define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2(<2 x float>* %p ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] -; X86-NEXT: # xmm1 = mem[0],zero ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vcvtps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc1] +; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f] -; X64-NEXT: # xmm1 = mem[0],zero -; X64-NEXT: vcvtps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc1] +; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -472,18 +428,14 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2(<2 x float>* % ; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X86-NEXT: # xmm0 = mem[0],zero ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vcvtps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0xc0] +; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-NEXT: # xmm0 = mem[0],zero -; X64-NEXT: vcvtps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0xc0] +; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -766,16 +718,12 @@ define <2 x i64> @test_int_x86_avx512_cvtt_ps2qq_128_load(<2 x float>* %p) { ; X86-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X86-NEXT: # xmm0 = mem[0],zero -; X86-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load: ; X64: # %bb.0: -; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-NEXT: # xmm0 = mem[0],zero -; X64-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -787,18 +735,14 @@ define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load(<2 x float>* %p, ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] -; X86-NEXT: # xmm1 = mem[0],zero ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vcvttps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc1] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f] -; X64-NEXT: # xmm1 = mem[0],zero -; X64-NEXT: vcvttps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc1] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -810,18 +754,14 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load(<2 x float>* %p, ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X86-NEXT: # xmm0 = mem[0],zero ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vcvttps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0xc0] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-NEXT: # xmm0 = mem[0],zero -; X64-NEXT: vcvttps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0xc0] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -834,16 +774,12 @@ define <2 x i64> @test_int_x86_avx512_cvtt_ps2qq_128_load_2(<2 x float>* %p) { ; X86-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X86-NEXT: # xmm0 = mem[0],zero -; X86-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_2: ; X64: # %bb.0: -; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-NEXT: # xmm0 = mem[0],zero -; X64-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -855,18 +791,14 @@ define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2(<2 x float>* %p ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] -; X86-NEXT: # xmm1 = mem[0],zero ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vcvttps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc1] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f] -; X64-NEXT: # xmm1 = mem[0],zero -; X64-NEXT: vcvttps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc1] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -878,18 +810,14 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2(<2 x float>* % ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X86-NEXT: # xmm0 = mem[0],zero ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vcvttps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0xc0] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-NEXT: # xmm0 = mem[0],zero -; X64-NEXT: vcvttps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0xc0] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -1078,16 +1006,12 @@ define <2 x i64> @test_int_x86_avx512_cvtt_ps2uqq_128_load(<2 x float>* %p) { ; X86-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X86-NEXT: # xmm0 = mem[0],zero -; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load: ; X64: # %bb.0: -; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-NEXT: # xmm0 = mem[0],zero -; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -1099,18 +1023,14 @@ define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load(<2 x float>* %p, ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] -; X86-NEXT: # xmm1 = mem[0],zero ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vcvttps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc1] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f] -; X64-NEXT: # xmm1 = mem[0],zero -; X64-NEXT: vcvttps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc1] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -1122,18 +1042,14 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load(<2 x float>* %p ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X86-NEXT: # xmm0 = mem[0],zero ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0xc0] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-NEXT: # xmm0 = mem[0],zero -; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0xc0] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -1146,16 +1062,12 @@ define <2 x i64> @test_int_x86_avx512_cvtt_ps2uqq_128_load_2(<2 x float>* %p) { ; X86-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X86-NEXT: # xmm0 = mem[0],zero -; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_2: ; X64: # %bb.0: -; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-NEXT: # xmm0 = mem[0],zero -; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -1167,18 +1079,14 @@ define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2(<2 x float>* % ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] -; X86-NEXT: # xmm1 = mem[0],zero ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vcvttps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc1] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f] -; X64-NEXT: # xmm1 = mem[0],zero -; X64-NEXT: vcvttps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc1] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -1190,18 +1098,14 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2(<2 x float>* ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] -; X86-NEXT: # xmm0 = mem[0],zero ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0xc0] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] -; X64-NEXT: # xmm0 = mem[0],zero -; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0xc0] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll b/llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll index 08cb37e..9541d38 100644 --- a/llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll +++ b/llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll @@ -2698,8 +2698,7 @@ define <2 x i64> @fptosi_2f32_to_2i64_load(<2 x float>* %x) { ; ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64_load: ; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: vcvttps2qq (%rdi), %xmm0 ; AVX512VLDQ-NEXT: retq %a = load <2 x float>, <2 x float>* %x %b = fptosi <2 x float> %a to <2 x i64> @@ -2787,8 +2786,7 @@ define <2 x i64> @fptoui_2f32_to_2i64_load(<2 x float>* %x) { ; ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64_load: ; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: vcvttps2uqq (%rdi), %xmm0 ; AVX512VLDQ-NEXT: retq %a = load <2 x float>, <2 x float>* %x %b = fptoui <2 x float> %a to <2 x i64> diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll index d4470d4..071d468 100644 --- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll +++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll @@ -2980,8 +2980,7 @@ define <2 x i64> @fptosi_2f32_to_2i64_load(<2 x float>* %x) { ; ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64_load: ; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: vcvttps2qq (%rdi), %xmm0 ; AVX512VLDQ-NEXT: retq %a = load <2 x float>, <2 x float>* %x %b = fptosi <2 x float> %a to <2 x i64> @@ -3069,8 +3068,7 @@ define <2 x i64> @fptoui_2f32_to_2i64_load(<2 x float>* %x) { ; ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64_load: ; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: vcvttps2uqq (%rdi), %xmm0 ; AVX512VLDQ-NEXT: retq %a = load <2 x float>, <2 x float>* %x %b = fptoui <2 x float> %a to <2 x i64> -- 2.7.4