From faccf427dfd137b857df916b358c0c888118918e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 6 Jun 2021 12:44:24 -0400 Subject: [PATCH] AMDGPU/GlobalISel: Remove special case lowering for non-pow-2 stores We end up with extra copies from buildAnyExtOrTrunc if these are lowered after the register types are legalized. --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 1 - .../AMDGPU/GlobalISel/legalize-store-global.mir | 60 +++++++++++++--------- .../CodeGen/AMDGPU/GlobalISel/legalize-store.mir | 10 ++-- 3 files changed, 42 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index c1a9b30..a357c86 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1266,7 +1266,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // May need relegalization for the scalars. return std::make_pair(0, EltTy); }) - .lowerIfMemSizeNotPow2() .minScalar(0, S32) .narrowScalarIf(isWideScalarExtLoadTruncStore(0), changeTo(0, S32)) .widenScalarToNextPow2(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir index 11d8d28..5647db7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -247,41 +247,45 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; CI-LABEL: name: test_store_global_s24_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; CI: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s24_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; GFX9-LABEL: name: test_store_global_s24_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; GFX9: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 @@ -316,11 +320,12 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; CI: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s24_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -343,11 +348,12 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; GFX9: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 @@ -391,11 +397,12 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) + ; CI: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI-LABEL: name: test_store_global_s24_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -425,11 +432,12 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) + ; GFX9: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 @@ -818,11 +826,12 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; CI: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; CI: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY2]](s64) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; CI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY3]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY3]](s64) ; CI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) ; CI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; CI: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1) @@ -866,11 +875,12 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY2]](s64) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY3]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY3]](s64) ; GFX9: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; GFX9: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1) @@ -909,11 +919,12 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; CI: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; CI: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY2]](s64) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; CI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY3]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY3]](s64) ; CI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) ; CI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; CI: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) @@ -940,11 +951,12 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY2]](s64) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY3]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY3]](s64) ; GFX9: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; GFX9: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir index 258578f..bc1f793 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -823,10 +823,11 @@ body: | ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C3]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C3]](s32) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v3s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -853,10 +854,11 @@ body: | ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 -- 2.7.4