From 426db6b4eb2e9298598d5c09d27f0cdb27917751 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 22 Apr 2023 14:54:27 +0100 Subject: [PATCH] [X86] LowerBUILD_VECTOR - fold build_vector(undef,freeze(undef)) -> freeze(undef) Noticed while triaging #62286 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 +++ llvm/test/CodeGen/X86/build-vector-128.ll | 30 ++++++++++----------- llvm/test/CodeGen/X86/freeze-vector.ll | 45 +++++++++++++------------------ 3 files changed, 36 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 05c8d6c..cda001d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -11259,6 +11259,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (UndefMask.isAllOnes()) return DAG.getUNDEF(VT); + // All undef/freeze(undef) vector. Return an FREEZE UNDEF. + if ((UndefMask | FrozenUndefMask).isAllOnes()) + return DAG.getFreeze(DAG.getUNDEF(VT)); + // If we have multiple FREEZE-UNDEF operands, we are likely going to end up // lowering into a suboptimal insertion sequence. Instead, thaw the UNDEF in // our source BUILD_VECTOR, create another FREEZE-UNDEF splat BUILD_VECTOR, diff --git a/llvm/test/CodeGen/X86/build-vector-128.ll b/llvm/test/CodeGen/X86/build-vector-128.ll index a14884a..df664a9 100644 --- a/llvm/test/CodeGen/X86/build-vector-128.ll +++ b/llvm/test/CodeGen/X86/build-vector-128.ll @@ -556,30 +556,28 @@ define void @pr60168_buildvector_of_zeros_and_undef(<2 x i32> %x, ptr %out) { ; SSE2-32-LABEL: pr60168_buildvector_of_zeros_and_undef: ; SSE2-32: # %bb.0: ; SSE2-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SSE2-32-NEXT: movd %eax, %xmm1 -; SSE2-32-NEXT: xorps %xmm2, %xmm2 -; SSE2-32-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[0,0] +; SSE2-32-NEXT: xorpd %xmm1, %xmm1 +; SSE2-32-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1] ; SSE2-32-NEXT: paddd %xmm0, %xmm0 -; SSE2-32-NEXT: psubd %xmm0, %xmm2 -; SSE2-32-NEXT: movdqa %xmm2, %xmm0 +; SSE2-32-NEXT: psubd %xmm0, %xmm1 +; SSE2-32-NEXT: movdqa %xmm1, %xmm0 ; SSE2-32-NEXT: psrad $31, %xmm0 -; SSE2-32-NEXT: pxor %xmm0, %xmm2 -; SSE2-32-NEXT: psubd %xmm0, %xmm2 -; SSE2-32-NEXT: movq %xmm2, (%eax) +; SSE2-32-NEXT: pxor %xmm0, %xmm1 +; SSE2-32-NEXT: psubd %xmm0, %xmm1 +; SSE2-32-NEXT: movq %xmm1, (%eax) ; SSE2-32-NEXT: retl ; ; SSE2-64-LABEL: pr60168_buildvector_of_zeros_and_undef: ; SSE2-64: # %bb.0: -; SSE2-64-NEXT: movd %eax, %xmm1 -; SSE2-64-NEXT: xorps %xmm2, %xmm2 -; SSE2-64-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[0,0] +; SSE2-64-NEXT: xorpd %xmm1, %xmm1 +; SSE2-64-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1] ; SSE2-64-NEXT: paddd %xmm0, %xmm0 -; SSE2-64-NEXT: psubd %xmm0, %xmm2 -; SSE2-64-NEXT: movdqa %xmm2, %xmm0 +; SSE2-64-NEXT: psubd %xmm0, %xmm1 +; SSE2-64-NEXT: movdqa %xmm1, %xmm0 ; SSE2-64-NEXT: psrad $31, %xmm0 -; SSE2-64-NEXT: pxor %xmm0, %xmm2 -; SSE2-64-NEXT: psubd %xmm0, %xmm2 -; SSE2-64-NEXT: movq %xmm2, (%rdi) +; SSE2-64-NEXT: pxor %xmm0, %xmm1 +; SSE2-64-NEXT: psubd %xmm0, %xmm1 +; SSE2-64-NEXT: movq %xmm1, (%rdi) ; SSE2-64-NEXT: retq ; ; SSE41-32-LABEL: pr60168_buildvector_of_zeros_and_undef: diff --git a/llvm/test/CodeGen/X86/freeze-vector.ll b/llvm/test/CodeGen/X86/freeze-vector.ll index 4139d97..5f060fb 100644 --- a/llvm/test/CodeGen/X86/freeze-vector.ll +++ b/llvm/test/CodeGen/X86/freeze-vector.ll @@ -353,11 +353,9 @@ define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst ; X86-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7] ; X86-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%ecx) -; X86-NEXT: vmovd %eax, %xmm0 -; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X86-NEXT: vmovd %edx, %xmm2 -; X86-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] -; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7] +; X86-NEXT: vmovd %edx, %xmm0 +; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7] ; X86-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%eax) ; X86-NEXT: retl @@ -372,9 +370,7 @@ define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst ; X64-NEXT: vmovdqa %xmm0, (%rdx) ; X64-NEXT: vmovd %eax, %xmm0 ; X64-NEXT: vpbroadcastd %xmm0, %xmm0 -; X64-NEXT: vmovd %eax, %xmm2 -; X64-NEXT: vpbroadcastd %xmm2, %xmm2 -; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2],xmm0[3] +; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2,3] ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%rcx) ; X64-NEXT: retq @@ -401,15 +397,13 @@ define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1, ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl (%edx), %edx ; X86-NEXT: andl $15, %edx -; X86-NEXT: vmovd %eax, %xmm0 -; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X86-NEXT: vmovd %edx, %xmm1 -; X86-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1] -; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5,6,7] +; X86-NEXT: vmovd %edx, %xmm0 +; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,1,1] +; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] ; X86-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7] -; X86-NEXT: vpand %xmm2, %xmm0, %xmm0 -; X86-NEXT: vmovdqa %xmm0, (%ecx) -; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] +; X86-NEXT: vpand %xmm2, %xmm1, %xmm1 +; X86-NEXT: vmovdqa %xmm1, (%ecx) +; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; X86-NEXT: vpand %xmm2, %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%eax) ; X86-NEXT: retl @@ -420,13 +414,11 @@ define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1, ; X64-NEXT: andl $15, %eax ; X64-NEXT: vmovd %eax, %xmm0 ; X64-NEXT: vpbroadcastd %xmm0, %xmm0 -; X64-NEXT: vmovd %eax, %xmm1 -; X64-NEXT: vpbroadcastd %xmm1, %xmm1 -; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] +; X64-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0,1,2,3] ; X64-NEXT: vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7] +; X64-NEXT: vpand %xmm2, %xmm1, %xmm1 +; X64-NEXT: vmovdqa %xmm1, (%rdx) ; X64-NEXT: vpand %xmm2, %xmm0, %xmm0 -; X64-NEXT: vmovdqa %xmm0, (%rdx) -; X64-NEXT: vpand %xmm2, %xmm1, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%rcx) ; X64-NEXT: retq %i0.src = load i32, ptr %origin0 @@ -451,14 +443,13 @@ define void @freeze_two_buildvectors_one_undef_elt(ptr %origin0, ptr %origin1, p ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl (%edx), %edx ; X86-NEXT: andl $15, %edx -; X86-NEXT: vmovd %eax, %xmm0 -; X86-NEXT: vmovd %edx, %xmm1 -; X86-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; X86-NEXT: vmovd %edx, %xmm0 +; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6,7] ; X86-NEXT: vmovddup {{.*#+}} xmm2 = [7,7] ; X86-NEXT: # xmm2 = mem[0,0] -; X86-NEXT: vpand %xmm2, %xmm0, %xmm0 -; X86-NEXT: vmovdqa %xmm0, (%ecx) -; X86-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] +; X86-NEXT: vpand %xmm2, %xmm1, %xmm1 +; X86-NEXT: vmovdqa %xmm1, (%ecx) +; X86-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] ; X86-NEXT: vpand %xmm2, %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%eax) ; X86-NEXT: retl -- 2.7.4