From: Craig Topper Date: Fri, 9 Sep 2016 01:36:10 +0000 (+0000) Subject: [AVX-512] Add VPCMP instructions to the load folding tables and make them commutable. X-Git-Tag: llvmorg-4.0.0-rc1~10258 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=149e6bdc164ece3c664ee002ee1416c488559f2d;p=platform%2Fupstream%2Fllvm.git [AVX-512] Add VPCMP instructions to the load folding tables and make them commutable. llvm-svn: 281013 --- diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 885f135..31ed86f 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1647,6 +1647,7 @@ def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), multiclass avx512_icmp_cc opc, string Suffix, SDNode OpNode, X86VectorVTInfo _> { + let isCommutable = 1 in def rri : AVX512AIi8 NLE + case 0x02: Imm = 0x05; break; // LE -> NLT + case 0x05: Imm = 0x02; break; // NLT -> LE + case 0x06: Imm = 0x01; break; // NLE -> LT + case 0x00: // EQ + case 0x03: // FALSE + case 0x04: // NE + case 0x07: // TRUE + break; + } + auto &WorkingMI = cloneIfNew(MI); + WorkingMI.getOperand(3).setImm(Imm); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + OpIdx1, OpIdx2); + } case X86::VPCOMBri: case X86::VPCOMUBri: case X86::VPCOMDri: case X86::VPCOMUDri: case X86::VPCOMQri: case X86::VPCOMUQri: @@ -3491,6 +3546,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, // Flip comparison mode immediate (if necessary). unsigned Imm = MI.getOperand(3).getImm() & 0x7; switch (Imm) { + default: llvm_unreachable("Unreachable!"); case 0x00: Imm = 0x02; break; // LT -> GT case 0x01: Imm = 0x03; break; // LE -> GE case 0x02: Imm = 0x00; break; // GT -> LT @@ -3499,7 +3555,6 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, case 0x05: // NE case 0x06: // FALSE case 0x07: // TRUE - default: break; } auto &WorkingMI = cloneIfNew(MI); diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll index db61196..e0acf2b 100644 --- a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll @@ -132,8 +132,7 @@ define <8 x i32> @test256_8(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) noun define <8 x i32> @test256_8b(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind { ; CHECK-LABEL: test256_8b: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu32 (%rdi), %ymm2 -; CHECK-NEXT: vpcmpnltud %ymm0, %ymm2, %k1 +; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 ; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 @@ -277,8 +276,7 @@ define <8 x i32> @test256_17(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi define <8 x i32> @test256_18(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwind { ; CHECK-LABEL: test256_18: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu32 (%rdi), %ymm2 -; CHECK-NEXT: vpcmpneqd %ymm0, %ymm2, %k1 +; CHECK-NEXT: vpcmpneqd (%rdi), %ymm0, %k1 ; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 @@ -302,8 +300,7 @@ define <8 x i32> @test256_19(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi define <8 x i32> @test256_20(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwind { ; CHECK-LABEL: test256_20: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu32 (%rdi), %ymm2 -; CHECK-NEXT: vpcmpnltud %ymm0, %ymm2, %k1 +; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 ; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 @@ -443,8 +440,7 @@ define <4 x i32> @test128_8(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) noun define <4 x i32> @test128_8b(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind { ; CHECK-LABEL: test128_8b: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu32 (%rdi), %xmm2 -; CHECK-NEXT: vpcmpnltud %xmm0, %xmm2, %k1 +; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 ; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 @@ -588,8 +584,7 @@ define <4 x i32> @test128_17(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou define <4 x i32> @test128_18(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind { ; CHECK-LABEL: test128_18: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu32 (%rdi), %xmm2 -; CHECK-NEXT: vpcmpneqd %xmm0, %xmm2, %k1 +; CHECK-NEXT: vpcmpneqd (%rdi), %xmm0, %k1 ; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 @@ -613,8 +608,7 @@ define <4 x i32> @test128_19(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou define <4 x i32> @test128_20(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind { ; CHECK-LABEL: test128_20: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu32 (%rdi), %xmm2 -; CHECK-NEXT: vpcmpnltud %xmm0, %xmm2, %k1 +; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 ; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4