From 9ae797a7984aad0e0b09f2b030e7786cf395ab58 Mon Sep 17 00:00:00 2001 From: Guy Blank Date: Sun, 21 Aug 2016 08:02:27 +0000 Subject: [PATCH] [AVX512][FastISel] Do not use K registers in TEST instructions In some cases, FastIsel was emitting TEST instruction with K reg input, which is illegal. Changed to using KORTEST when dealing with K regs. Differential Revision: https://reviews.llvm.org/D23163 llvm-svn: 279393 --- llvm/lib/Target/X86/X86FastISel.cpp | 37 +++++++++-- llvm/test/CodeGen/X86/fast-isel-select-cmov.ll | 88 ++++++++++++++++++-------- 2 files changed, 94 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index f30331f..50f80cb 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -1654,6 +1654,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { if (TestOpc) { unsigned OpReg = getRegForValue(TI->getOperand(0)); if (OpReg == 0) return false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc)) .addReg(OpReg).addImm(1); @@ -1691,8 +1692,15 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { unsigned OpReg = getRegForValue(BI->getCondition()); if (OpReg == 0) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) - .addReg(OpReg).addImm(1); + // In case OpReg is a K register, kortest against itself. + if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::KORTESTWrr)) + .addReg(OpReg) + .addReg(OpReg); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) + .addReg(OpReg) + .addImm(1); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1)) .addMBB(TrueMBB); finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); @@ -2026,8 +2034,16 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { return false; bool CondIsKill = hasTrivialKill(Cond); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) - .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1); + // In case OpReg is a K register, kortest against itself. + if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(X86::KORTESTWrr)) + .addReg(CondReg, getKillRegState(CondIsKill)) + .addReg(CondReg); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) + .addReg(CondReg, getKillRegState(CondIsKill)) + .addImm(1); } const Value *LHS = I->getOperand(1); @@ -2198,8 +2214,17 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) { if (CondReg == 0) return false; bool CondIsKill = hasTrivialKill(Cond); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) - .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1); + + // In case OpReg is a K register, kortest against itself. + if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(X86::KORTESTWrr)) + .addReg(CondReg, getKillRegState(CondIsKill)) + .addReg(CondReg); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) + .addReg(CondReg, getKillRegState(CondIsKill)) + .addImm(1); } const Value *LHS = I->getOperand(1); diff --git a/llvm/test/CodeGen/X86/fast-isel-select-cmov.ll b/llvm/test/CodeGen/X86/fast-isel-select-cmov.ll index 879cd2f..290bcaa 100644 --- a/llvm/test/CodeGen/X86/fast-isel-select-cmov.ll +++ b/llvm/test/CodeGen/X86/fast-isel-select-cmov.ll @@ -1,60 +1,98 @@ -; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-apple-darwin10 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-apple-darwin10 | FileCheck %s --check-prefix=CHECK --check-prefix=NOAVX512 +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-apple-darwin10 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 ; Test conditional move for the supported types (i16, i32, and i32) and ; conditon input (argument or cmp). Currently i8 is not supported. define zeroext i16 @select_cmov_i16(i1 zeroext %cond, i16 zeroext %a, i16 zeroext %b) { -; CHECK-LABEL: select_cmov_i16 -; CHECK: testb $1, %dil -; CHECK-NEXT: cmovew %dx, %si -; CHECK-NEXT: movzwl %si, %eax +; NOAVX512-LABEL: select_cmov_i16: +; NOAVX512: ## BB#0: +; NOAVX512-NEXT: testb $1, %dil +; NOAVX512-NEXT: cmovew %dx, %si +; NOAVX512-NEXT: movzwl %si, %eax +; NOAVX512-NEXT: retq +; +; AVX512-LABEL: select_cmov_i16: +; AVX512: ## BB#0: +; AVX512-NEXT: kmovw %edi, %k0 +; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: cmovew %dx, %si +; AVX512-NEXT: movzwl %si, %eax +; AVX512-NEXT: retq %1 = select i1 %cond, i16 %a, i16 %b ret i16 %1 } define zeroext i16 @select_cmp_cmov_i16(i16 zeroext %a, i16 zeroext %b) { -; CHECK-LABEL: select_cmp_cmov_i16 -; CHECK: cmpw %si, %di -; CHECK-NEXT: cmovbw %di, %si -; CHECK-NEXT: movzwl %si, %eax +; CHECK-LABEL: select_cmp_cmov_i16: +; CHECK: ## BB#0: +; CHECK-NEXT: cmpw %si, %di +; CHECK-NEXT: cmovbw %di, %si +; CHECK-NEXT: movzwl %si, %eax +; CHECK-NEXT: retq %1 = icmp ult i16 %a, %b %2 = select i1 %1, i16 %a, i16 %b ret i16 %2 } define i32 @select_cmov_i32(i1 zeroext %cond, i32 %a, i32 %b) { -; CHECK-LABEL: select_cmov_i32 -; CHECK: testb $1, %dil -; CHECK-NEXT: cmovel %edx, %esi -; CHECK-NEXT: movl %esi, %eax +; NOAVX512-LABEL: select_cmov_i32: +; NOAVX512: ## BB#0: +; NOAVX512-NEXT: testb $1, %dil +; NOAVX512-NEXT: cmovel %edx, %esi +; NOAVX512-NEXT: movl %esi, %eax +; NOAVX512-NEXT: retq +; +; AVX512-LABEL: select_cmov_i32: +; AVX512: ## BB#0: +; AVX512-NEXT: kmovw %edi, %k0 +; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: cmovel %edx, %esi +; AVX512-NEXT: movl %esi, %eax +; AVX512-NEXT: retq %1 = select i1 %cond, i32 %a, i32 %b ret i32 %1 } define i32 @select_cmp_cmov_i32(i32 %a, i32 %b) { -; CHECK-LABEL: select_cmp_cmov_i32 -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: cmovbl %edi, %esi -; CHECK-NEXT: movl %esi, %eax +; CHECK-LABEL: select_cmp_cmov_i32: +; CHECK: ## BB#0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: cmovbl %edi, %esi +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: retq %1 = icmp ult i32 %a, %b %2 = select i1 %1, i32 %a, i32 %b ret i32 %2 } define i64 @select_cmov_i64(i1 zeroext %cond, i64 %a, i64 %b) { -; CHECK-LABEL: select_cmov_i64 -; CHECK: testb $1, %dil -; CHECK-NEXT: cmoveq %rdx, %rsi -; CHECK-NEXT: movq %rsi, %rax +; NOAVX512-LABEL: select_cmov_i64: +; NOAVX512: ## BB#0: +; NOAVX512-NEXT: testb $1, %dil +; NOAVX512-NEXT: cmoveq %rdx, %rsi +; NOAVX512-NEXT: movq %rsi, %rax +; NOAVX512-NEXT: retq +; +; AVX512-LABEL: select_cmov_i64: +; AVX512: ## BB#0: +; AVX512-NEXT: kmovw %edi, %k0 +; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: cmoveq %rdx, %rsi +; AVX512-NEXT: movq %rsi, %rax +; AVX512-NEXT: retq %1 = select i1 %cond, i64 %a, i64 %b ret i64 %1 } define i64 @select_cmp_cmov_i64(i64 %a, i64 %b) { -; CHECK-LABEL: select_cmp_cmov_i64 -; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: cmovbq %rdi, %rsi -; CHECK-NEXT: movq %rsi, %rax +; CHECK-LABEL: select_cmp_cmov_i64: +; CHECK: ## BB#0: +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovbq %rdi, %rsi +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: retq %1 = icmp ult i64 %a, %b %2 = select i1 %1, i64 %a, i64 %b ret i64 %2 -- 2.7.4