From 1cbcd8ad2071178e9bb7c0c6b58a19c1283db9e3 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 21 Dec 2022 04:51:52 +0300 Subject: [PATCH] [X86] avx512fp16: add missing instruction selection patterns for "i16" `VMOVSH` For all other patterns, we consistently have both I and F variants, let's not diverge. Fixes https://github.com/llvm/llvm-project/issues/59628 --- llvm/lib/Target/X86/X86InstrAVX512.td | 12 ++++++++++++ llvm/test/CodeGen/X86/avx512fp16-mov.ll | 24 ++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index b8b2145..6da4dd2 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4705,16 +4705,28 @@ let Predicates = [HasAVX512] in { let Predicates = [HasFP16] in { def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))), (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>; + def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))), + (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>; // FIXME we need better canonicalization in dag combine def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))), (SUBREG_TO_REG (i32 0), (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>; + def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))), + (SUBREG_TO_REG (i32 0), + (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), + (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>; + + // FIXME we need better canonicalization in dag combine def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))), (SUBREG_TO_REG (i32 0), (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>; + def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))), + (SUBREG_TO_REG (i32 0), + (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), + (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>; def : Pat<(v8f16 (X86vzload16 addr:$src)), (VMOVSHZrm addr:$src)>; diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll index 407b84c..09706f0 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll @@ -2058,3 +2058,27 @@ define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width %3 = and <16 x i32> %2, ret <16 x i32> %3 } + +define <8 x i16> @pr59628_xmm(i16 %arg) { +; X64-LABEL: pr59628_xmm: +; X64: # %bb.0: +; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X64-NEXT: vpbroadcastw %edi, %xmm1 +; X64-NEXT: vmovsh %xmm1, %xmm0, %xmm0 +; X64-NEXT: vpcmpneqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 +; X64-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: retq +; +; X86-LABEL: pr59628_xmm: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X86-NEXT: vpbroadcastw %eax, %xmm1 +; X86-NEXT: vmovsh %xmm1, %xmm0, %xmm0 +; X86-NEXT: vpcmpneqw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %k1 +; X86-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: retl + %I1 = insertelement <8 x i16> zeroinitializer, i16 %arg, i16 0 + %I2 = insertelement <8 x i16> %I1, i16 0, i16 %arg + ret <8 x i16> %I2 +} -- 2.7.4