From: Matt Arsenault Date: Sun, 26 Jul 2020 00:16:15 +0000 (-0400) Subject: AMDGPU: Don't assume call targets are registers X-Git-Tag: llvmorg-13-init~16498 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=068808d102846d551e683241d0b3b9607b3bdf8d;p=platform%2Fupstream%2Fllvm.git AMDGPU: Don't assume call targets are registers GlobalISel let through a call to null, which would then fold into the source operand like any other inline immediate. The SelectionDAG lowering deletes calls to null and undef as a workaround from before calls were supported. We should probably drop the special handling case in the DAG lowering now, since the middle end optimizers delete null calls anyway. --- diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 2a157eb..5ab6edf 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -963,26 +963,28 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( int CallAddrOpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); - RegInterval CallAddrOpInterval = + + if (MI.getOperand(CallAddrOpIdx).isReg()) { + RegInterval CallAddrOpInterval = ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, CallAddrOpIdx); - for (int RegNo = CallAddrOpInterval.first; - RegNo < CallAddrOpInterval.second; ++RegNo) - ScoreBrackets.determineWait( + for (int RegNo = CallAddrOpInterval.first; + RegNo < CallAddrOpInterval.second; ++RegNo) + ScoreBrackets.determineWait( LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait); - int RtnAddrOpIdx = - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); - if (RtnAddrOpIdx != -1) { - RegInterval RtnAddrOpInterval = + int RtnAddrOpIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); + if (RtnAddrOpIdx != -1) { + RegInterval RtnAddrOpInterval = ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, RtnAddrOpIdx); - for (int RegNo = RtnAddrOpInterval.first; - RegNo < RtnAddrOpInterval.second; ++RegNo) - ScoreBrackets.determineWait( + for (int RegNo = RtnAddrOpInterval.first; + RegNo < RtnAddrOpInterval.second; ++RegNo) + ScoreBrackets.determineWait( LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait); + } } - } else { // FIXME: Should not be relying on memoperands. // Look at the source operands of every instruction to see if diff --git a/llvm/test/CodeGen/AMDGPU/call-constant.ll b/llvm/test/CodeGen/AMDGPU/call-constant.ll index 11f4b3c..9a52ccf 100644 --- a/llvm/test/CodeGen/AMDGPU/call-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/call-constant.ll @@ -1,11 +1,12 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=0 -amdgpu-fixed-function-abi=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=GCN,SDAG %s +; RUN: llc -global-isel=1 -amdgpu-fixed-function-abi=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=GCN,GISEL %s ; FIXME: Emitting unnecessary flat_scratch setup ; GCN-LABEL: {{^}}test_call_undef: -; GCN: s_mov_b32 flat_scratch_lo, s5 -; GCN: s_add_u32 s4, s4, s7 -; GCN: s_lshr_b32 +; SDAG: s_mov_b32 flat_scratch_lo, s5 +; SDAG: s_add_u32 s4, s4, s7 +; SDAG: s_lshr_b32 ; GCN: s_endpgm define amdgpu_kernel void @test_call_undef() #0 { %val = call i32 undef(i32 1) @@ -15,17 +16,21 @@ define amdgpu_kernel void @test_call_undef() #0 { } ; GCN-LABEL: {{^}}test_tail_call_undef: -; GCN: s_waitcnt -; GCN-NEXT: .Lfunc_end +; SDAG: s_waitcnt +; SDAG-NEXT: .Lfunc_end + +; GISEL: s_swappc_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} define i32 @test_tail_call_undef() #0 { %call = tail call i32 undef(i32 1) ret i32 %call } ; GCN-LABEL: {{^}}test_call_null: -; GCN: s_mov_b32 flat_scratch_lo, s5 -; GCN: s_add_u32 s4, s4, s7 -; GCN: s_lshr_b32 +; SDAG: s_mov_b32 flat_scratch_lo, s5 +; SDAG: s_add_u32 s4, s4, s7 +; SDAG: s_lshr_b32 + +; GISEL: s_swappc_b64 s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} ; GCN: s_endpgm define amdgpu_kernel void @test_call_null() #0 { %val = call i32 null(i32 1) @@ -35,8 +40,10 @@ define amdgpu_kernel void @test_call_null() #0 { } ; GCN-LABEL: {{^}}test_tail_call_null: -; GCN: s_waitcnt -; GCN-NEXT: .Lfunc_end +; SDAG: s_waitcnt +; SDAG-NEXT: .Lfunc_end + +; GISEL: s_swappc_b64 s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} define i32 @test_tail_call_null() #0 { %call = tail call i32 null(i32 1) ret i32 %call