From: Simon Pilgrim Date: Wed, 8 Feb 2023 19:55:04 +0000 (+0000) Subject: [DAG] Fold freeze(concat_vectors(x,y,...)) -> concat_vectors(freeze(x),freeze(y)... X-Git-Tag: upstream/17.0.6~18162 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ce63cd3bf1df5e9940c1c5e691029830e5041332;p=platform%2Fupstream%2Fllvm.git [DAG] Fold freeze(concat_vectors(x,y,...)) -> concat_vectors(freeze(x),freeze(y),...) Another of the cleanups necessary for D136529 --- diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 58f2ba0a..172c830 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14711,7 +14711,8 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { return SDValue(); bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR || - N0.getOpcode() == ISD::BUILD_PAIR; + N0.getOpcode() == ISD::BUILD_PAIR || + N0.getOpcode() == ISD::CONCAT_VECTORS; SmallSetVector MaybePoisonOperands; for (SDValue Op : N0->ops()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 074b19b..0cbca37 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4758,6 +4758,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::AssertSext: case ISD::AssertZext: case ISD::FREEZE: + case ISD::CONCAT_VECTORS: case ISD::INSERT_SUBVECTOR: case ISD::AND: case ISD::OR: diff --git a/llvm/test/CodeGen/X86/avx512-broadcast-arith.ll b/llvm/test/CodeGen/X86/avx512-broadcast-arith.ll index 22fff49..13d1265 100644 --- a/llvm/test/CodeGen/X86/avx512-broadcast-arith.ll +++ b/llvm/test/CodeGen/X86/avx512-broadcast-arith.ll @@ -30,15 +30,14 @@ define <64 x i8> @add_v64i8_broadcasts(<64 x i8> %a0, i64 %a1, i8 %a2) { ; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; AVX512F-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512F-NEXT: vpaddb %ymm1, %ymm3, %ymm3 +; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm3 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3 -; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm4 ; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpaddb %ymm1, %ymm3, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-NEXT: vpternlogq $226, %zmm3, %zmm2, %zmm0 +; AVX512F-NEXT: vpternlogq $226, %zmm4, %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: add_v64i8_broadcasts: diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-512.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-512.ll index b032ebf..364b3612 100644 --- a/llvm/test/CodeGen/X86/midpoint-int-vec-512.ll +++ b/llvm/test/CodeGen/X86/midpoint-int-vec-512.ll @@ -283,17 +283,16 @@ define <32 x i16> @vec512_i16_signed_reg_reg(<32 x i16> %a1, <32 x i16> %a2) nou ; AVX512F-NEXT: vpsubw %ymm5, %ymm1, %ymm1 ; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm5 +; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; AVX512F-NEXT: vpsubw %ymm2, %ymm6, %ymm2 +; AVX512F-NEXT: vpsubw %ymm1, %ymm6, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512F-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm1 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX512F-NEXT: vpsubw %ymm2, %ymm5, %ymm2 -; AVX512F-NEXT: vpsubw %ymm1, %ymm5, %ymm5 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm5, %zmm2 -; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm4, %zmm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm1 -; AVX512F-NEXT: vpaddw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpaddw %ymm0, %ymm2, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddw %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-FALLBACK-LABEL: vec512_i16_signed_reg_reg: @@ -311,17 +310,16 @@ define <32 x i16> @vec512_i16_signed_reg_reg(<32 x i16> %a1, <32 x i16> %a2) nou ; AVX512VL-FALLBACK-NEXT: vpsubw %ymm5, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2 +; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm5 +; AVX512VL-FALLBACK-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm6, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm6, %ymm1 ; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512VL-FALLBACK-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm1 ; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm5, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm5, %ymm5 -; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm5, %zmm2 -; AVX512VL-FALLBACK-NEXT: vpternlogq $226, %zmm1, %zmm4, %zmm2 -; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm2, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 -; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm2, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-FALLBACK-NEXT: retq ; ; AVX512BW-LABEL: vec512_i16_signed_reg_reg: @@ -362,17 +360,16 @@ define <32 x i16> @vec512_i16_unsigned_reg_reg(<32 x i16> %a1, <32 x i16> %a2) n ; AVX512F-NEXT: vpsubw %ymm6, %ymm1, %ymm1 ; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm4 +; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; AVX512F-NEXT: vpsubw %ymm2, %ymm6, %ymm2 +; AVX512F-NEXT: vpsubw %ymm1, %ymm6, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512F-NEXT: vpternlogq $184, %zmm4, %zmm5, %zmm1 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512F-NEXT: vpsubw %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsubw %ymm1, %ymm4, %ymm4 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm4, %zmm2 -; AVX512F-NEXT: vpternlogq $216, %zmm5, %zmm1, %zmm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm1 -; AVX512F-NEXT: vpaddw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpaddw %ymm0, %ymm2, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddw %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-FALLBACK-LABEL: vec512_i16_unsigned_reg_reg: @@ -390,17 +387,16 @@ define <32 x i16> @vec512_i16_unsigned_reg_reg(<32 x i16> %a1, <32 x i16> %a2) n ; AVX512VL-FALLBACK-NEXT: vpsubw %ymm6, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2 +; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm4 +; AVX512VL-FALLBACK-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm6, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm6, %ymm1 ; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512VL-FALLBACK-NEXT: vpternlogq $184, %zmm4, %zmm5, %zmm1 ; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm4, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm4, %ymm4 -; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm4, %zmm2 -; AVX512VL-FALLBACK-NEXT: vpternlogq $216, %zmm5, %zmm1, %zmm2 -; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm2, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 -; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm2, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-FALLBACK-NEXT: retq ; ; AVX512BW-LABEL: vec512_i16_unsigned_reg_reg: @@ -444,17 +440,16 @@ define <32 x i16> @vec512_i16_signed_mem_reg(ptr %a1_addr, <32 x i16> %a2) nounw ; AVX512F-NEXT: vpsubw %ymm5, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm5 +; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; AVX512F-NEXT: vpsubw %ymm1, %ymm6, %ymm1 +; AVX512F-NEXT: vpsubw %ymm0, %ymm6, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm0 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX512F-NEXT: vpsubw %ymm1, %ymm5, %ymm1 -; AVX512F-NEXT: vpsubw %ymm0, %ymm5, %ymm5 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm1 -; AVX512F-NEXT: vpternlogq $226, %zmm0, %zmm4, %zmm1 -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0 -; AVX512F-NEXT: vpaddw %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpaddw %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-NEXT: vpaddw %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vpaddw %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-FALLBACK-LABEL: vec512_i16_signed_mem_reg: @@ -473,17 +468,16 @@ define <32 x i16> @vec512_i16_signed_mem_reg(ptr %a1_addr, <32 x i16> %a2) nounw ; AVX512VL-FALLBACK-NEXT: vpsubw %ymm5, %ymm0, %ymm0 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm0, %ymm0 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm5 +; AVX512VL-FALLBACK-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm6, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpsubw %ymm0, %ymm6, %ymm0 ; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512VL-FALLBACK-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm0 ; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm5, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpsubw %ymm0, %ymm5, %ymm5 -; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm1 -; AVX512VL-FALLBACK-NEXT: vpternlogq $226, %zmm0, %zmm4, %zmm1 -; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm0 -; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 -; AVX512VL-FALLBACK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 +; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512VL-FALLBACK-NEXT: retq ; ; AVX512BW-LABEL: vec512_i16_signed_mem_reg: @@ -527,17 +521,16 @@ define <32 x i16> @vec512_i16_signed_reg_mem(<32 x i16> %a1, ptr %a2_addr) nounw ; AVX512F-NEXT: vpsubw %ymm5, %ymm1, %ymm1 ; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm5 +; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; AVX512F-NEXT: vpsubw %ymm2, %ymm6, %ymm2 +; AVX512F-NEXT: vpsubw %ymm1, %ymm6, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512F-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm1 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX512F-NEXT: vpsubw %ymm2, %ymm5, %ymm2 -; AVX512F-NEXT: vpsubw %ymm1, %ymm5, %ymm5 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm5, %zmm2 -; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm4, %zmm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm1 -; AVX512F-NEXT: vpaddw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpaddw %ymm0, %ymm2, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddw %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-FALLBACK-LABEL: vec512_i16_signed_reg_mem: @@ -556,17 +549,16 @@ define <32 x i16> @vec512_i16_signed_reg_mem(<32 x i16> %a1, ptr %a2_addr) nounw ; AVX512VL-FALLBACK-NEXT: vpsubw %ymm5, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2 +; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm5 +; AVX512VL-FALLBACK-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm6, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm6, %ymm1 ; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512VL-FALLBACK-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm1 ; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm5, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm5, %ymm5 -; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm5, %zmm2 -; AVX512VL-FALLBACK-NEXT: vpternlogq $226, %zmm1, %zmm4, %zmm2 -; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm2, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 -; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm2, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-FALLBACK-NEXT: retq ; ; AVX512BW-LABEL: vec512_i16_signed_reg_mem: @@ -611,17 +603,16 @@ define <32 x i16> @vec512_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwin ; AVX512F-NEXT: vpsubw %ymm5, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm5 +; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; AVX512F-NEXT: vpsubw %ymm1, %ymm6, %ymm1 +; AVX512F-NEXT: vpsubw %ymm0, %ymm6, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm0 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX512F-NEXT: vpsubw %ymm1, %ymm5, %ymm1 -; AVX512F-NEXT: vpsubw %ymm0, %ymm5, %ymm5 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm1 -; AVX512F-NEXT: vpternlogq $226, %zmm0, %zmm4, %zmm1 -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0 -; AVX512F-NEXT: vpaddw %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpaddw %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-NEXT: vpaddw %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vpaddw %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-FALLBACK-LABEL: vec512_i16_signed_mem_mem: @@ -641,17 +632,16 @@ define <32 x i16> @vec512_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwin ; AVX512VL-FALLBACK-NEXT: vpsubw %ymm5, %ymm0, %ymm0 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm0, %ymm0 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm5 +; AVX512VL-FALLBACK-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm6, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpsubw %ymm0, %ymm6, %ymm0 ; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512VL-FALLBACK-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm0 ; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm5, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpsubw %ymm0, %ymm5, %ymm5 -; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm1 -; AVX512VL-FALLBACK-NEXT: vpternlogq $226, %zmm0, %zmm4, %zmm1 -; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm0 -; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 -; AVX512VL-FALLBACK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 +; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512VL-FALLBACK-NEXT: retq ; ; AVX512BW-LABEL: vec512_i16_signed_mem_mem: