From 359918dadf4d3c17df33208e403132c371cb2473 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 17 Sep 2019 04:40:58 +0000 Subject: [PATCH] [X86] Enable commuting of EVEX VCMP for all immediate values during isel. llvm-svn: 372065 --- llvm/lib/Target/X86/X86InstrAVX512.td | 29 +++++++++------- llvm/lib/Target/X86/X86InstrInfo.cpp | 21 ++++++++++-- llvm/lib/Target/X86/X86InstrInfo.h | 3 ++ llvm/test/CodeGen/X86/avx512-mask-op.ll | 45 +++++++++---------------- llvm/test/CodeGen/X86/select-of-fp-constants.ll | 2 +- 5 files changed, 55 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index b8c936f..49b7b09 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2462,6 +2462,11 @@ def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), return N->hasOneUse(); }]>; +def X86cmpm_imm_commute : SDNodeXFormgetZExtValue() & 0x1f); + return getI8Imm(Imm, SDLoc(N)); +}]>; + multiclass avx512_vcmp_common { defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, @@ -2498,29 +2503,29 @@ multiclass avx512_vcmp_common(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, - imm:$cc)>; + (X86cmpm_imm_commute imm:$cc))>; def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), (_.VT _.RC:$src1), - CommutableCMPCC:$cc)), + imm:$cc)), (!cast(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, addr:$src2, - imm:$cc)>; + (X86cmpm_imm_commute imm:$cc))>; def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)), - (_.VT _.RC:$src1), CommutableCMPCC:$cc), + (_.VT _.RC:$src1), imm:$cc), (!cast(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, - imm:$cc)>; + (X86cmpm_imm_commute imm:$cc))>; def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (X86VBroadcast (_.ScalarLdFrag addr:$src2)), (_.VT _.RC:$src1), - CommutableCMPCC:$cc)), + imm:$cc)), (!cast(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, addr:$src2, - imm:$cc)>; + (X86cmpm_imm_commute imm:$cc))>; } multiclass avx512_vcmp_sae { @@ -2556,12 +2561,12 @@ defm VCMPPS : avx512_vcmp, // Patterns to select fp compares with load as first operand. let Predicates = [HasAVX512] in { def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, - CommutableCMPCC:$cc)), - (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>; + imm:$cc)), + (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute imm:$cc))>; def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, - CommutableCMPCC:$cc)), - (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>; + imm:$cc)), + (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute imm:$cc))>; } // ---------------------------------------------------------------- diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 1831986e..4fa49e6 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -2270,7 +2270,7 @@ unsigned X86::getVPCMPImmForCond(ISD::CondCode CC) { } } -/// Get the VPCMP immediate if the opcodes are swapped. +/// Get the VPCMP immediate if the operands are swapped. unsigned X86::getSwappedVPCMPImm(unsigned Imm) { switch (Imm) { default: llvm_unreachable("Unreachable!"); @@ -2288,7 +2288,7 @@ unsigned X86::getSwappedVPCMPImm(unsigned Imm) { return Imm; } -/// Get the VPCOM immediate if the opcodes are swapped. +/// Get the VPCOM immediate if the operands are swapped. unsigned X86::getSwappedVPCOMImm(unsigned Imm) { switch (Imm) { default: llvm_unreachable("Unreachable!"); @@ -2306,6 +2306,23 @@ unsigned X86::getSwappedVPCOMImm(unsigned Imm) { return Imm; } +/// Get the VCMP immediate if the operands are swapped. +unsigned X86::getSwappedVCMPImm(unsigned Imm) { + // Only need the lower 2 bits to distinquish. + switch (Imm & 0x3) { + default: llvm_unreachable("Unreachable!"); + case 0x00: case 0x03: + // EQ/NE/TRUE/FALSE/ORD/UNORD don't change immediate when commuted. + break; + case 0x01: case 0x02: + // Need to toggle bits 3:0. Bit 4 stays the same. + Imm ^= 0xf; + break; + } + + return Imm; +} + bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { if (!MI.isTerminator()) return false; diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 3dd516f..8ff2fc4 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -67,6 +67,9 @@ unsigned getSwappedVPCMPImm(unsigned Imm); /// Get the VPCOM immediate if the opcodes are swapped. unsigned getSwappedVPCOMImm(unsigned Imm); +/// Get the VCMP immediate if the opcodes are swapped. +unsigned getSwappedVCMPImm(unsigned Imm); + } // namespace X86 /// isGlobalStubReference - Return true if the specified TargetFlag operand is diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index f9d754a..db878ff 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -1968,8 +1968,7 @@ define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { define void @ktest_1(<8 x double> %in, double * %base) { ; KNL-LABEL: ktest_1: ; KNL: ## %bb.0: -; KNL-NEXT: vmovupd (%rdi), %zmm1 -; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; KNL-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ; KNL-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} ; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} ; KNL-NEXT: kmovw %k0, %eax @@ -1986,8 +1985,7 @@ define void @ktest_1(<8 x double> %in, double * %base) { ; ; SKX-LABEL: ktest_1: ; SKX: ## %bb.0: -; SKX-NEXT: vmovupd (%rdi), %zmm1 -; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; SKX-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} ; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} ; SKX-NEXT: kortestb %k0, %k0 @@ -2003,8 +2001,7 @@ define void @ktest_1(<8 x double> %in, double * %base) { ; ; AVX512BW-LABEL: ktest_1: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vmovupd (%rdi), %zmm1 -; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; AVX512BW-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} ; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} ; AVX512BW-NEXT: kmovd %k0, %eax @@ -2021,8 +2018,7 @@ define void @ktest_1(<8 x double> %in, double * %base) { ; ; AVX512DQ-LABEL: ktest_1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vmovupd (%rdi), %zmm1 -; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; AVX512DQ-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ; AVX512DQ-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} ; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} ; AVX512DQ-NEXT: kortestb %k0, %k0 @@ -2039,8 +2035,7 @@ define void @ktest_1(<8 x double> %in, double * %base) { ; X86-LABEL: ktest_1: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: vmovupd (%eax), %zmm1 -; X86-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; X86-NEXT: vcmpgtpd (%eax), %zmm0, %k1 ; X86-NEXT: vmovupd 8(%eax), %zmm1 {%k1} {z} ; X86-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} ; X86-NEXT: kortestb %k0, %k0 @@ -2084,10 +2079,8 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; ; KNL-LABEL: ktest_2: ; KNL: ## %bb.0: -; KNL-NEXT: vmovups (%rdi), %zmm2 -; KNL-NEXT: vmovups 64(%rdi), %zmm3 -; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1 -; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2 +; KNL-NEXT: vcmpgtps 64(%rdi), %zmm1, %k1 +; KNL-NEXT: vcmpgtps (%rdi), %zmm0, %k2 ; KNL-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z} ; KNL-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z} ; KNL-NEXT: vcmpltps %zmm3, %zmm1, %k0 @@ -2112,10 +2105,8 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; ; SKX-LABEL: ktest_2: ; SKX: ## %bb.0: -; SKX-NEXT: vmovups (%rdi), %zmm2 -; SKX-NEXT: vmovups 64(%rdi), %zmm3 -; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1 -; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2 +; SKX-NEXT: vcmpgtps (%rdi), %zmm0, %k1 +; SKX-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2 ; SKX-NEXT: kunpckwd %k1, %k2, %k0 ; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} ; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} @@ -2137,10 +2128,8 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; ; AVX512BW-LABEL: ktest_2: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vmovups (%rdi), %zmm2 -; AVX512BW-NEXT: vmovups 64(%rdi), %zmm3 -; AVX512BW-NEXT: vcmpltps %zmm0, %zmm2, %k1 -; AVX512BW-NEXT: vcmpltps %zmm1, %zmm3, %k2 +; AVX512BW-NEXT: vcmpgtps (%rdi), %zmm0, %k1 +; AVX512BW-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2 ; AVX512BW-NEXT: kunpckwd %k1, %k2, %k0 ; AVX512BW-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} ; AVX512BW-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} @@ -2162,10 +2151,8 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; ; AVX512DQ-LABEL: ktest_2: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vmovups (%rdi), %zmm2 -; AVX512DQ-NEXT: vmovups 64(%rdi), %zmm3 -; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k1 -; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k2 +; AVX512DQ-NEXT: vcmpgtps 64(%rdi), %zmm1, %k1 +; AVX512DQ-NEXT: vcmpgtps (%rdi), %zmm0, %k2 ; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z} ; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z} ; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm1, %k0 @@ -2191,10 +2178,8 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; X86-LABEL: ktest_2: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: vmovups (%eax), %zmm2 -; X86-NEXT: vmovups 64(%eax), %zmm3 -; X86-NEXT: vcmpltps %zmm0, %zmm2, %k1 -; X86-NEXT: vcmpltps %zmm1, %zmm3, %k2 +; X86-NEXT: vcmpgtps (%eax), %zmm0, %k1 +; X86-NEXT: vcmpgtps 64(%eax), %zmm1, %k2 ; X86-NEXT: kunpckwd %k1, %k2, %k0 ; X86-NEXT: vmovups 68(%eax), %zmm2 {%k2} {z} ; X86-NEXT: vmovups 4(%eax), %zmm3 {%k1} {z} diff --git a/llvm/test/CodeGen/X86/select-of-fp-constants.ll b/llvm/test/CodeGen/X86/select-of-fp-constants.ll index cc9ac9e..b63a713 100644 --- a/llvm/test/CodeGen/X86/select-of-fp-constants.ll +++ b/llvm/test/CodeGen/X86/select-of-fp-constants.ll @@ -61,7 +61,7 @@ define float @fcmp_select_fp_constants(float %x) nounwind readnone { ; X32_AVX512F-LABEL: fcmp_select_fp_constants: ; X32_AVX512F: # %bb.0: ; X32_AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32_AVX512F-NEXT: vcmpneqss {{[0-9]+}}(%esp), %xmm0, %k0 +; X32_AVX512F-NEXT: vcmpneqss {{\.LCPI.*}}, %xmm0, %k0 ; X32_AVX512F-NEXT: kmovw %k0, %eax ; X32_AVX512F-NEXT: flds {{\.LCPI.*}}(,%eax,4) ; X32_AVX512F-NEXT: retl -- 2.7.4