From fd3300123de4fdc77cce94520d2cb58ea4c9122e Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Thu, 12 Jan 2023 03:52:33 -0500 Subject: [PATCH] [CodeGen] Prevent overlapping subregs in getCoveringSubRegIndexes If `getCoveringSubRegIndexes` returns a set of subregister indexes where some subregisters overlap others, it can create unsatisfiable copy bundles that eventually cause VirtRegRewriter to error out due to "cycles in copy bundle". We can simply prevent this by making the algorithm skip over subregisters indexes that would cause an overlap with already-covered lanes. Note that in the case of AMDGPU, this problem is caused by the lack of subregisters indexes for 13/14/15-register tuples. We have everything up until 12, then we have 16 and 32 but nothing between 12 and 16. This means that the best candidate to do the least amount of copies when splitting a 29-register tuple was to copy (e.g.) 0-15 and 14-29, causing an overlap. With this change, getCoveringSubRegIndexes will now prefer using something like 0-15, 16-28 and 1 Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D141576 --- llvm/lib/CodeGen/TargetRegisterInfo.cpp | 12 ++++++++---- .../CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir | 3 ++- .../CodeGen/AMDGPU/split-liverange-overlapping-copies.mir | 6 ++++-- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index 38a3e68..a41d599 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -571,10 +571,14 @@ bool TargetRegisterInfo::getCoveringSubRegIndexes( break; } - // Try to cover as much of the remaining lanes as possible but - // as few of the already covered lanes as possible. - int Cover = (SubRegMask & LanesLeft).getNumLanes() - - (SubRegMask & ~LanesLeft).getNumLanes(); + // Do not cover already-covered lanes to avoid creating cycles + // in copy bundles (= bundle contains copies that write to the + // registers). + if ((SubRegMask & ~LanesLeft).any()) + continue; + + // Try to cover as many of the remaining lanes as possible. + const int Cover = (SubRegMask & LanesLeft).getNumLanes(); if (Cover > BestCover) { BestCover = Cover; BestIdx = Idx; diff --git a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir index 31e1947..41b85ce 100644 --- a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir +++ b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir @@ -36,7 +36,8 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024_align2 = COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { - ; CHECK-NEXT: internal %6.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31:av_1024_align2 = COPY [[COPY]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; CHECK-NEXT: internal %6.sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16:av_1024_align2 = COPY [[COPY]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16 + ; CHECK-NEXT: internal %6.sub29_sub30_sub31:av_1024_align2 = COPY [[COPY]].sub29_sub30_sub31 ; CHECK-NEXT: } ; CHECK-NEXT: %6.sub0:av_1024_align2 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit %6.sub0 diff --git a/llvm/test/CodeGen/AMDGPU/split-liverange-overlapping-copies.mir b/llvm/test/CodeGen/AMDGPU/split-liverange-overlapping-copies.mir index 4cb2b6f..f4cf0f4 100644 --- a/llvm/test/CodeGen/AMDGPU/split-liverange-overlapping-copies.mir +++ b/llvm/test/CodeGen/AMDGPU/split-liverange-overlapping-copies.mir @@ -41,7 +41,8 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024_align2 = COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { - ; CHECK-NEXT: internal %6.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31:av_1024_align2 = COPY [[COPY]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; CHECK-NEXT: internal %6.sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16:av_1024_align2 = COPY [[COPY]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16 + ; CHECK-NEXT: internal %6.sub29_sub30_sub31:av_1024_align2 = COPY [[COPY]].sub29_sub30_sub31 ; CHECK-NEXT: } ; CHECK-NEXT: %6.sub0:av_1024_align2 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit %6.sub0 @@ -116,7 +117,8 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024 = COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { - ; CHECK-NEXT: internal %6.sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30:av_1024 = COPY [[COPY]].sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30 + ; CHECK-NEXT: internal %6.sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16:av_1024 = COPY [[COPY]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16 + ; CHECK-NEXT: internal %6.sub29_sub30:av_1024 = COPY [[COPY]].sub29_sub30 ; CHECK-NEXT: } ; CHECK-NEXT: %6.sub0:av_1024 = IMPLICIT_DEF ; CHECK-NEXT: %6.sub31:av_1024 = IMPLICIT_DEF -- 2.7.4