From 156a1b59dfacacacb2c5ac81ee2db4b8e9d46279 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 24 Feb 2020 15:43:18 -0500 Subject: [PATCH] AMDGPU: Make signext/zeroext behave more sensibly over > i32 Interpret these as extending to the next multiple of 32-bits. This had no effect with i48 for example, which is really split into {i32, i16}, which should extend the high part. --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 11 ++++++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 3 ++ llvm/test/CodeGen/AMDGPU/function-returns.ll | 48 +++++++++++++++++++++++++++ 3 files changed, 62 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 79a0d20..625db96 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -617,6 +617,17 @@ bool AMDGPUTargetLowering::allUsesHaveSourceMods(const SDNode *N, return true; } +EVT AMDGPUTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, + ISD::NodeType ExtendKind) const { + assert(!VT.isVector() && "only scalar expected"); + + // Round to the next multiple of 32-bits. + unsigned Size = VT.getSizeInBits(); + if (Size <= 32) + return MVT::i32; + return EVT::getIntegerVT(Context, 32 * ((Size + 31) / 32)); +} + MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const { return MVT::i32; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index ffe5113..a8d5f55 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -178,6 +178,9 @@ public: bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; + EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, + ISD::NodeType ExtendKind) const override; + MVT getVectorIdxTy(const DataLayout &) const override; bool isSelectSupported(SelectSupportKind) const override; diff --git a/llvm/test/CodeGen/AMDGPU/function-returns.ll b/llvm/test/CodeGen/AMDGPU/function-returns.ll index f40729d..49a6938 100644 --- a/llvm/test/CodeGen/AMDGPU/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/function-returns.ll @@ -104,6 +104,54 @@ define i48 @i48_func_void() #0 { ret i48 %val } +; GCN-LABEL: {{^}}i48_zeroext_func_void: +; GCN: buffer_load_dword v0, off +; GCN-NEXT: buffer_load_ushort v1, off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define zeroext i48 @i48_zeroext_func_void() #0 { + %val = load i48, i48 addrspace(1)* undef, align 8 + ret i48 %val +} + +; GCN-LABEL: {{^}}i48_signext_func_void: +; GCN: buffer_load_dword v0, off +; GCN-NEXT: buffer_load_sshort v1, off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define signext i48 @i48_signext_func_void() #0 { + %val = load i48, i48 addrspace(1)* undef, align 8 + ret i48 %val +} + +; GCN-LABEL: {{^}}i63_func_void: +; GCN: s_waitcnt +; GCN-NEXT: s_setpc_b64 +define i63 @i63_func_void(i63 %val) #0 { + ret i63 %val +} + +; GCN-LABEL: {{^}}i63_zeroext_func_void: +; GCN: s_waitcnt +; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 +; GCN-NEXT: s_setpc_b64 +define zeroext i63 @i63_zeroext_func_void(i63 %val) #0 { + ret i63 %val +} + +; GCN-LABEL: {{^}}i63_signext_func_void: +; GCN: s_waitcnt +; CI-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 +; CI-NEXT: v_ashr_i64 v[0:1], v[0:1], 1 + +; GFX89-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GFX89-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1] + +; GCN-NEXT: s_setpc_b64 +define signext i63 @i63_signext_func_void(i63 %val) #0 { + ret i63 %val +} + ; GCN-LABEL: {{^}}i64_func_void: ; GCN: buffer_load_dwordx2 v[0:1], off ; GCN-NEXT: s_waitcnt vmcnt(0) -- 2.7.4