From 37894ba6612bf24060f38001122946108a770bc8 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 29 Jul 2020 22:34:11 -0400 Subject: [PATCH] AMDGPU/GlobalISel: Make s16 phi legal If we were to have an operation with an s16 def that needs to be executed in a waterfall loop, not having s16 legal would place an avoidable burden on RegBankSelect to widen it. --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 4 +-- .../CodeGen/AMDGPU/GlobalISel/legalize-phi.mir | 32 ++++++++++------------ 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 33992ca..e33523c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -443,13 +443,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more // elements for v3s16 getActionDefinitionsBuilder(G_PHI) - .legalFor({S32, S64, V2S16, V4S16, S1, S128, S256}) + .legalFor({S32, S64, V2S16, S16, V4S16, S1, S128, S256}) .legalFor(AllS32Vectors) .legalFor(AllS64Vectors) .legalFor(AddrSpaces64) .legalFor(AddrSpaces32) .legalIf(isPointer(0)) - .clampScalar(0, S32, S256) + .clampScalar(0, S16, S256) .widenScalarToNextPow2(0, 32) .clampMaxNumElements(0, S32, 16) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index 10bd179..50311d6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -1295,19 +1295,19 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK: G_BR %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) ; CHECK: G_BR %bb.2 ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[COPY2]](s32), %bb.0, [[COPY3]](s32), %bb.1 + ; CHECK: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC1]](s16), %bb.1 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[PHI]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; CHECK: $vgpr0 = COPY [[AND]](s32) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -1348,19 +1348,19 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK: G_BR %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) ; CHECK: G_BR %bb.2 ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[COPY2]](s32), %bb.0, [[COPY3]](s32), %bb.1 + ; CHECK: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC1]](s16), %bb.1 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[PHI]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; CHECK: $vgpr0 = COPY [[AND]](s32) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -1401,19 +1401,17 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK: G_BR %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK: G_BR %bb.2 ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[COPY2]](s32), %bb.0, [[DEF]](s32), %bb.1 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[PHI]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; CHECK: $vgpr0 = COPY [[AND]](s32) + ; CHECK: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[DEF]](s16), %bb.1 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[PHI]](s16) + ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 -- 2.7.4