From 2327513b853f030ff399413a651974ab23de4e1b Mon Sep 17 00:00:00 2001 From: "Wang, Pengfei" Date: Sat, 20 Mar 2021 12:55:46 +0800 Subject: [PATCH] [X86] Fix a bug when calculating the ldtilecfg insertion points. The BB we initialized the ldtilecfg is special. We don't need to check if its predecessor BBs need to insert ldtilecfg for calls. We reused the flag HasCallBeforeAMX, so that the predecessors won't be added to CfgNeedInsert. This case happens only when the entry BB is in a loop. We need to hoist the first tile config point out of the loop in future. Reviewed By: LuoYuanke Differential Revision: https://reviews.llvm.org/D98845 --- llvm/lib/Target/X86/X86PreTileConfig.cpp | 6 ++++++ llvm/test/CodeGen/X86/AMX/amx-across-func.ll | 7 +++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp index dd35a5d..cd5d3d6 100644 --- a/llvm/lib/Target/X86/X86PreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp @@ -296,6 +296,12 @@ static void reloadTileConfig(MachineInstr *MI, int FI, MachineBasicBlock *MBB = MI->getParent(); BBVisitedInfo[MBB] = BBInfo(CfgNeedInsert, MBB, MI); + // The entry BB is special, since it always has a ldtilecfg before AMX + // instruction. We don't need to check if its predecessor BBs have call. + // FIXME: This case happens only when the entry BB is in a loop. We need to + // hoist the first tile config point out of the loop in future. + BBVisitedInfo[MBB].HasCallBeforeAMX = true; + WorkList.push_back(MBB); while (!WorkList.empty()) { MBB = WorkList.pop_back_val(); diff --git a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll index 2bb73e2..d8d18a7 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll @@ -280,15 +280,14 @@ define dso_local void @test_loop2(i32 %0) nounwind { ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: callq foo -; CHECK-NEXT: movb $1, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; CHECK-NEXT: testl %ebx, %ebx ; CHECK-NEXT: jle .LBB3_3 ; CHECK-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 ; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; CHECK-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb $1, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; CHECK-NEXT: tileloadd (%r14,%r15), %tmm0 ; CHECK-NEXT: movabsq $64, %rax -- 2.7.4