From: Craig Topper Date: Sun, 18 Feb 2018 02:37:33 +0000 (+0000) Subject: [X86] Make masked pcmpeq commutable during isel so we can fold loads in other operand... X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1040f236a3d9a0dd2db576c94fb93af6d0799940;p=platform%2Fupstream%2Fllvm.git [X86] Make masked pcmpeq commutable during isel so we can fold loads in other operand to the shorter encoding. Previously we used the immediate encoding if the load was in operand 0 and the short encoding if the load was in operand 1. This added an insane number of bytes to the size of the isel table. I'm wondering if we should always use the immediate form during isel and change to the short form during emission. This would remove the need to pattern match every combination for both the immediate form and the short form during isel. We could do the same with vpcmpgt llvm-svn: 325456 --- diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 05ab4df..395ab8a 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2190,9 +2190,9 @@ multiclass avx512_icmp_packed_rmb_vl opc, string OpcodeStr, } def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2), - (X86cmpm node:$src1, node:$src2, (i8 0))>; + (X86cmpm_c node:$src1, node:$src2, (i8 0))>; def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2), - (X86cmpm node:$src1, node:$src2, (i8 4))>; + (X86cmpm_c node:$src1, node:$src2, (i8 4))>; def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), (X86cmpm node:$src1, node:$src2, (i8 6))>; diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 1350054..aa6fcb1 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -171,6 +171,8 @@ def X86CmpMaskCCScalarRound : SDTCisVT<3, i8>, SDTCisVT<4, i32>]>; def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; +// Hack to make CMPM commutable in tablegen patterns for load folding. +def X86cmpm_c : SDNode<"X86ISD::CMPM", X86CmpMaskCC, [SDNPCommutative]>; def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>; def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>; def X86cmpms : SDNode<"X86ISD::FSETCCM", X86CmpMaskCCScalar>; diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll index 0cb15c3..61b1bea 100644 --- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll @@ -1081,7 +1081,7 @@ define i16 @pcmpeq_mem_1(<16 x i32> %a, <16 x i32>* %b) { define i16 @pcmpeq_mem_2(<16 x i32> %a, <16 x i32>* %b) { ; KNL-LABEL: pcmpeq_mem_2: ; KNL: ## %bb.0: -; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x07,0x00] +; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07] ; KNL-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax ; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] @@ -1089,7 +1089,7 @@ define i16 @pcmpeq_mem_2(<16 x i32> %a, <16 x i32>* %b) { ; ; AVX512BW-LABEL: pcmpeq_mem_2: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x07,0x00] +; AVX512BW-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07] ; AVX512BW-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax ; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] @@ -1097,7 +1097,7 @@ define i16 @pcmpeq_mem_2(<16 x i32> %a, <16 x i32>* %b) { ; ; SKX-LABEL: pcmpeq_mem_2: ; SKX: ## %bb.0: -; SKX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x07,0x00] +; SKX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07] ; SKX-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax ; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]