From 6a4acb9d809aaadb9304a7a2f3382d958a6c2adf Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 30 Jan 2020 15:39:51 -0800 Subject: [PATCH] Revert "AMDGPU: Cleanup and fix SMRD offset handling" This reverts commit 17dbc6611df9044d779d85b3d545bd37e5dd5200. A test is failing on some bots --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 33 ++-- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 14 +- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 2 +- llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 2 +- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 37 +--- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 16 +- .../GlobalISel/inst-select-load-constant.mir | 10 +- llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll | 11 +- .../CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll | 220 +-------------------- 9 files changed, 39 insertions(+), 306 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 62fc6f5..d887ed6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1771,31 +1771,26 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, SDLoc SL(ByteOffsetNode); GCNSubtarget::Generation Gen = Subtarget->getGeneration(); - uint64_t ByteOffset = C->getZExtValue(); - Optional EncodedOffset = - AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); - if (EncodedOffset) { - Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); + int64_t ByteOffset = C->getSExtValue(); + int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); + + if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { + Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); Imm = true; return true; } - if (Gen == AMDGPUSubtarget::SEA_ISLANDS) { - EncodedOffset = - AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset); - if (EncodedOffset) { - Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); - return true; - } - } - - if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset)) + if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) return false; - SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); - Offset = SDValue( - CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0); - + if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { + // 32-bit Immediates are supported on Sea Islands. + Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); + } else { + SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); + Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, + C32Bit), 0); + } Imm = false; return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 55ed81d..90136f5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2107,14 +2107,15 @@ AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { return None; const GEPInfo &GEPInfo = AddrInfo[0]; - Optional EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); - if (!EncodedImm) + + if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm)) return None; unsigned PtrReg = GEPInfo.SgprParts[0]; + int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } + [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } }}; } @@ -2128,14 +2129,13 @@ AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const { const GEPInfo &GEPInfo = AddrInfo[0]; unsigned PtrReg = GEPInfo.SgprParts[0]; - Optional EncodedImm = - AMDGPU::getSMRDEncodedLiteralOffset32(STI, GEPInfo.Imm); - if (!EncodedImm) + int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); + if (!isUInt<32>(EncodedImm)) return None; return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } + [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } }}; } diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index d0c713d..8364665 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -587,7 +587,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST, 16, 4); unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; const GCNSubtarget &Subtarget = MF.getSubtarget(); - unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); + unsigned EncodedOffset = AMDGPU::getSMRDEncodedOffset(Subtarget, Offset); BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) .addReg(Rsrc01) .addImm(EncodedOffset) // offset diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 806f8af..ee20758 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -501,7 +501,7 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI, : 4; break; case S_BUFFER_LOAD_IMM: - EltSize = AMDGPU::convertSMRDOffsetUnits(STM, 4); + EltSize = AMDGPU::getSMRDEncodedOffset(STM, 4); break; default: EltSize = 4; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 14f9586..8b21b93 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1247,43 +1247,16 @@ static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { return isGCN3Encoding(ST) || isGFX10(ST); } -static bool isLegalSMRDEncodedImmOffset(const MCSubtargetInfo &ST, - int64_t EncodedOffset) { - return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) - : isUInt<8>(EncodedOffset); -} - -static bool isDwordAligned(uint64_t ByteOffset) { - return (ByteOffset & 3) == 0; -} - -uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, - uint64_t ByteOffset) { +int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { if (hasSMEMByteOffset(ST)) return ByteOffset; - - assert(isDwordAligned(ByteOffset)); return ByteOffset >> 2; } -Optional getSMRDEncodedOffset(const MCSubtargetInfo &ST, - int64_t ByteOffset) { - if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) - return None; - - int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); - return isLegalSMRDEncodedImmOffset(ST, EncodedOffset) ? - Optional(EncodedOffset) : None; -} - -Optional getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, - int64_t ByteOffset) { - if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) - return None; - - assert(isCI(ST)); - int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); - return isUInt<32>(EncodedOffset) ? Optional(EncodedOffset) : None; +bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { + int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); + return (hasSMEMByteOffset(ST)) ? + isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); } // Given Imm, split it into the values to put into the SOffset and ImmOffset diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 859f832..a5bada2 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -648,19 +648,9 @@ bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); bool isArgPassedInSGPR(const Argument *Arg); -/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate -/// offsets. -uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); - -/// \returns The encoding that will be used for \p ByteOffset in the SMRD offset -/// field, or None if it won't fit. This is useful on all subtargets. -Optional getSMRDEncodedOffset(const MCSubtargetInfo &ST, - int64_t ByteOffset); - -/// \return The encoding that can be used for a 32-bit literal offset in an SMRD -/// instruction. This is only useful on CI.s -Optional getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, - int64_t ByteOffset); +/// \returns The encoding that will be used for \p ByteOffset in the SMRD +/// offset field. +int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); /// \returns true if this offset is small enough to fit in the SMRD /// offset field. \p ByteOffset should be the offset in bytes and diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir index 74f18b7..720b0de 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir @@ -788,9 +788,8 @@ body: | ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 - ; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4) - ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262143, 0, 0 :: (load 4, addrspace 4) + ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 @@ -873,9 +872,8 @@ body: | ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4) - ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 268435455, 0, 0 :: (load 4, addrspace 4) + ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll index 1a3cc72..1cdf1d3 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -368,16 +368,9 @@ done: ; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32: ; GCN: s_and_saveexec_b64 -; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}} -; SI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}} +; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}} +; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}} ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} - -; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}} -; VI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}} -; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} - -; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffffff{{$}} - ; GCN: s_or_b64 exec, exec define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll index f35bb54..4c25ebb 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll @@ -1,11 +1,9 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI,SICI -; RUN: llc < %s -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,CI,SICI -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI +;RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI ;GCN-LABEL: {{^}}s_buffer_load_imm: ;GCN-NOT: s_waitcnt; ;SI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x1 -;CI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x1 ;VI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x4 define amdgpu_ps void @s_buffer_load_imm(<4 x i32> inreg %desc) { main_body: @@ -40,7 +38,6 @@ main_body: ;GCN-LABEL: {{^}}s_buffer_loadx2_imm: ;GCN-NOT: s_waitcnt; ;SI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10 -;CI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10 ;VI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x40 define amdgpu_ps void @s_buffer_loadx2_imm(<4 x i32> inreg %desc) { main_body: @@ -81,7 +78,6 @@ main_body: ;GCN-LABEL: {{^}}s_buffer_loadx3_imm: ;GCN-NOT: s_waitcnt; ;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10 -;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10 ;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x40 define amdgpu_ps void @s_buffer_loadx3_imm(<4 x i32> inreg %desc) { main_body: @@ -111,7 +107,6 @@ main_body: ;GCN-LABEL: {{^}}s_buffer_loadx3_index_divergent: ;GCN-NOT: s_waitcnt; ;SI: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen -;CI: buffer_load_dwordx3 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen ;VI: buffer_load_dwordx3 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen define amdgpu_ps void @s_buffer_loadx3_index_divergent(<4 x i32> inreg %desc, i32 %index) { main_body: @@ -127,7 +122,6 @@ main_body: ;GCN-LABEL: {{^}}s_buffer_loadx4_imm: ;GCN-NOT: s_waitcnt; ;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x32 -;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x32 ;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0xc8 define amdgpu_ps void @s_buffer_loadx4_imm(<4 x i32> inreg %desc) { main_body: @@ -174,7 +168,6 @@ main_body: ;GCN-LABEL: {{^}}s_buffer_load_imm_mergex2: ;GCN-NOT: s_waitcnt; ;SI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x1 -;CI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x1 ;VI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x4 define amdgpu_ps void @s_buffer_load_imm_mergex2(<4 x i32> inreg %desc) { main_body: @@ -189,7 +182,6 @@ main_body: ;GCN-LABEL: {{^}}s_buffer_load_imm_mergex4: ;GCN-NOT: s_waitcnt; ;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x2 -;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x2 ;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x8 define amdgpu_ps void @s_buffer_load_imm_mergex4(<4 x i32> inreg %desc) { main_body: @@ -244,214 +236,6 @@ bb1: ; preds = %main_body ret void } -; GCN-LABEL: {{^}}s_buffer_load_imm_neg1: -; GCN: s_mov_b32 [[K:s[0-9]+]], -1{{$}} -; GCN: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_neg1(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_neg4: -; SI: s_mov_b32 [[K:s[0-9]+]], -4{{$}} -; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} - -; CI: s_buffer_load_dword s0, s[0:3], 0x3fffffff{{$}} - -; VI: s_mov_b32 [[K:s[0-9]+]], -4{{$}} -; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_neg4(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_neg8: -; SI: s_mov_b32 [[K:s[0-9]+]], -8{{$}} -; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} - -; CI: s_buffer_load_dword s0, s[0:3], 0x3ffffffe{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_neg8(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_bit31: -; SI: s_brev_b32 [[K:s[0-9]+]], 1{{$}} -; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} - -; CI: s_buffer_load_dword s0, s[0:3], 0x20000000{{$}} - -; VI: s_brev_b32 [[K:s[0-9]+]], 1{{$}} -; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_bit31(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_bit30: -; SI: s_mov_b32 [[K:s[0-9]+]], 2.0{{$}} -; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} - -; CI: s_buffer_load_dword s0, s[0:3], 0x10000000{{$}} - -; VI: s_mov_b32 [[K:s[0-9]+]], 2.0{{$}} -; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_bit30(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_bit29: -; SI: s_brev_b32 [[K:s[0-9]+]], 4{{$}} -; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} - -; CI: s_buffer_load_dword s0, s[0:3], 0x8000000{{$}} - -; VI: s_brev_b32 [[K:s[0-9]+]], 4{{$}} -; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_bit29(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_bit21: -; SI: s_mov_b32 [[K:s[0-9]+]], 0x200000{{$}} -; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} - -; CI: s_buffer_load_dword s0, s[0:3], 0x80000{{$}} - -; VI: s_mov_b32 [[K:s[0-9]+]], 0x200000{{$}} -; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_bit21(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_bit20: -; SI: s_mov_b32 [[K:s[0-9]+]], 0x100000{{$}} -; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} - -; CI: s_buffer_load_dword s0, s[0:3], 0x40000{{$}} - -; VI: s_mov_b32 [[K:s[0-9]+]], 0x100000{{$}} -; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_bit20(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_neg_bit20: -; SI: s_mov_b32 [[K:s[0-9]+]], 0xfff00000{{$}} -; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} - -; CI: s_buffer_load_dword s0, s[0:3], 0x3ffc0000{{$}} - -; VI: s_mov_b32 [[K:s[0-9]+]], 0xfff00000{{$}} -; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_neg_bit20(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_bit19: -; SI: s_mov_b32 [[K:s[0-9]+]], 0x80000{{$}} -; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} - -; CI s_buffer_load_dword s0, s[0:3], 0x20000{{$}} - -; VI s_buffer_load_dword s0, s[0:3], 0x20000{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_bit19(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_neg_bit19: -; SI: s_mov_b32 [[K:s[0-9]+]], 0xfff80000{{$}} -; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} - -; CI s_buffer_load_dword s0, s[0:3], 0x20000{{$}} - -; VI s_buffer_load_dword s0, s[0:3], 0x20000{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_neg_bit19(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_255: -; SICI: s_movk_i32 [[K:s[0-9]+]], 0xff{{$}} -; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} - -; VI: s_buffer_load_dword s0, s[0:3], 0xff{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_255(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 255, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_256: -; SICI: s_buffer_load_dword s0, s[0:3], 0x40{{$}} -; VI: s_buffer_load_dword s0, s[0:3], 0x100{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_256(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 256, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_1016: -; SICI: s_buffer_load_dword s0, s[0:3], 0xfe{{$}} -; VI: s_buffer_load_dword s0, s[0:3], 0x3f8{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_1016(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1016, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_1020: -; SICI: s_buffer_load_dword s0, s[0:3], 0xff{{$}} -; VI: s_buffer_load_dword s0, s[0:3], 0x3fc{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_1020(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1020, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_1021: -; SICI: s_movk_i32 [[K:s[0-9]+]], 0x3fd{{$}} -; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_1021(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1021, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_1024: -; SI: s_movk_i32 [[K:s[0-9]+]], 0x400{{$}} -; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} - -; CI: s_buffer_load_dword s0, s[0:3], 0x100{{$}} - -; VI: s_buffer_load_dword s0, s[0:3], 0x400{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_1024(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_1025: -; SICI: s_movk_i32 [[K:s[0-9]+]], 0x401{{$}} -; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} - -; VI: s_buffer_load_dword s0, s[0:3], 0x401{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_1025(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1025, i32 0) - ret i32 %load -} - -; GCN-LABEL: {{^}}s_buffer_load_imm_1028: -; SI: s_movk_i32 [[K:s[0-9]+]], 0x400{{$}} -; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} - -; CI: s_buffer_load_dword s0, s[0:3], 0x100{{$}} -; VI: s_buffer_load_dword s0, s[0:3], 0x400{{$}} -define amdgpu_ps i32 @s_buffer_load_imm_1028(<4 x i32> inreg %desc) { - %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0) - ret i32 %load -} - declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32) declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32) -- 2.7.4