From 0b76fc4c772c03beb6d09eef7e48ffc77a214c82 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 16 Sep 2016 21:34:26 +0000 Subject: [PATCH] AMDGPU/SI: Add support for triples with the mesa3d operating system Summary: mesa3d will use the same kernel calling convention as amdhsa, but it will handle everything else like the default 'unknown' OS type. Reviewers: arsenm Subscribers: arsenm, llvm-commits, kzhuravl Differential Revision: https://reviews.llvm.org/D22783 llvm-svn: 281779 --- .../AMDGPU/AMDGPUAnnotateKernelFeatures.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 4 +- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 10 +- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 6 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 8 +- .../Target/AMDGPU/SIMachineFunctionInfo.cpp | 2 +- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 3 +- .../AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll | 16 +-- .../AMDGPU/llvm.amdgcn.workgroup.id.ll | 103 +++++++++--------- .../CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll | 12 +- 10 files changed, 89 insertions(+), 77 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index 7ee2dc71fce0..60afba656943 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -202,7 +202,7 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { // always initialized. bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr); - if (TT.getOS() == Triple::AMDHSA) { + if (TT.getOS() == Triple::AMDHSA || TT.getOS() == Triple::Mesa3D) { Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr); for (Function &F : M) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 6be53b11d0e1..f2b9fc8477d9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -123,7 +123,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { void AMDGPUAsmPrinter::EmitFunctionBodyStart() { const AMDGPUSubtarget &STM = MF->getSubtarget(); SIProgramInfo KernelInfo; - if (STM.isAmdHsaOS()) { + if (STM.isAmdCodeObjectV2()) { getSIProgramInfo(KernelInfo, *MF); EmitAmdKernelCodeT(*MF, KernelInfo); } @@ -132,7 +132,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { const SIMachineFunctionInfo *MFI = MF->getInfo(); const AMDGPUSubtarget &STM = MF->getSubtarget(); - if (MFI->isKernel() && STM.isAmdHsaOS()) { + if (MFI->isKernel() && STM.isAmdCodeObjectV2()) { AMDGPUTargetStreamer *TS = static_cast(OutStreamer->getTargetStreamer()); TS->EmitAMDGPUSymbolType(CurrentFnSym->getName(), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index b1cadeb0459e..6b953eadeed5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -138,6 +138,10 @@ public: return TargetTriple.getOS() == Triple::AMDHSA; } + bool isMesa3DOS() const { + return TargetTriple.getOS() == Triple::Mesa3D; + } + Generation getGeneration() const { return Gen; } @@ -270,10 +274,14 @@ public: return EnableXNACK; } + bool isAmdCodeObjectV2() const { + return isAmdHsaOS() || isMesa3DOS(); + } + /// \brief Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. unsigned getExplicitKernelArgOffset() const { - return isAmdHsaOS() ? 0 : 36; + return isAmdCodeObjectV2() ? 0 : 36; } unsigned getAlignmentForImplicitArgPtr() const { diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index d184e1b9620d..0c0d9c9f077f 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -221,7 +221,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; - if (ST.isAmdHsaOS()) { + if (ST.isAmdCodeObjectV2()) { PreloadedPrivateBufferReg = TRI->getPreloadedValue( MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); } @@ -240,7 +240,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, MRI.addLiveIn(PreloadedScratchWaveOffsetReg); MBB.addLiveIn(PreloadedScratchWaveOffsetReg); - if (ST.isAmdHsaOS()) { + if (ST.isAmdCodeObjectV2()) { MRI.addLiveIn(PreloadedPrivateBufferReg); MBB.addLiveIn(PreloadedPrivateBufferReg); } @@ -265,7 +265,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill); } - if (ST.isAmdHsaOS()) { + if (ST.isAmdCodeObjectV2()) { // Insert copies from argument register. assert( !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchRsrcReg) && diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 4dd2932f56be..6b94333bd41e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -872,12 +872,12 @@ SDValue SITargetLowering::LowerFormalArguments( if (HasStackObjects) Info->setHasNonSpillStackObjects(true); - if (ST.isAmdHsaOS()) { + if (ST.isAmdCodeObjectV2()) { // TODO: Assume we will spill without optimizations. if (HasStackObjects) { // If we have stack objects, we unquestionably need the private buffer - // resource. For the HSA ABI, this will be the first 4 user SGPR - // inputs. We can reserve those and use them directly. + // resource. For the Code Object V2 ABI, this will be the first 4 user + // SGPR inputs. We can reserve those and use them directly. unsigned PrivateSegmentBufferReg = TRI->getPreloadedValue( MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); @@ -1994,7 +1994,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, switch (IntrinsicID) { case Intrinsic::amdgcn_dispatch_ptr: case Intrinsic::amdgcn_queue_ptr: { - if (!Subtarget->isAmdHsaOS()) { + if (!Subtarget->isAmdCodeObjectV2()) { DiagnosticInfoUnsupported BadIntrin( *MF.getFunction(), "unsupported hsa intrinsic without hsa target", DL.getDebugLoc()); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 10f73a7f0ee2..e911817c451d 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -114,7 +114,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (HasStackObjects || MaySpill) PrivateSegmentWaveByteOffset = true; - if (ST.isAmdHsaOS()) { + if (ST.isAmdCodeObjectV2()) { if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 6804c0f6bb7c..bcee76f52996 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -801,7 +801,8 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: return MFI->PrivateSegmentWaveByteOffsetSystemSGPR; case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER: - assert(ST.isAmdHsaOS() && "Non-HSA ABI currently uses relocations"); + assert(ST.isAmdCodeObjectV2() && + "Non-CodeObjectV2 ABI currently uses relocations"); assert(MFI->hasPrivateSegmentBuffer()); return MFI->PrivateSegmentBufferUserSGPR; case SIRegisterInfo::KERNARG_SEGMENT_PTR: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll index c79cf8dffe0a..c108be5cd8aa 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll @@ -1,11 +1,12 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefix=MESA -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,MESA,ALL %s +; RUN: llc -mtriple=amdgcn-mesa-unknown -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,MESA,ALL %s ; ALL-LABEL: {{^}}test: -; HSA: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: s_load_dword s{{[0-9]+}}, s[4:5], 0xa +; CO-V2: enable_sgpr_kernarg_segment_ptr = 1 +; CO-V2: s_load_dword s{{[0-9]+}}, s[4:5], 0xa -; MESA: s_load_dword s{{[0-9]+}}, s[0:1], 0xa +; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa define void @test(i32 addrspace(1)* %out) #1 { %kernarg.segment.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() %header.ptr = bitcast i8 addrspace(2)* %kernarg.segment.ptr to i32 addrspace(2)* @@ -17,7 +18,7 @@ define void @test(i32 addrspace(1)* %out) #1 { ; ALL-LABEL: {{^}}test_implicit: ; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15 -; MESA: s_load_dword s{{[0-9]+}}, s[0:1], 0x15 +; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0x15 define void @test_implicit(i32 addrspace(1)* %out) #1 { %implicitarg.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() %header.ptr = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)* @@ -28,8 +29,9 @@ define void @test_implicit(i32 addrspace(1)* %out) #1 { } ; ALL-LABEL: {{^}}test_implicit_alignment -; MESA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc +; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc ; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4 +; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3 ; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]] ; MESA: buffer_store_dword [[V_VAL]] ; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll index 75a9ec91ecaf..58529b874442 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll @@ -1,7 +1,9 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HSA -check-prefix=CI-HSA %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HSA -check-prefix=VI-HSA %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA -check-prefix=SI-MESA %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA -check-prefix=VI-MESA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=CO-V2 -check-prefix=CI-HSA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=CO-V2 -check-prefix=VI-HSA %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=UNKNOWN-OS -check-prefix=SI-MESA %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=UNKNOWN-OS -check-prefix=VI-MESA %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,SI-MESA %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,VI-MESA %s declare i32 @llvm.amdgcn.workgroup.id.x() #0 declare i32 @llvm.amdgcn.workgroup.id.y() #0 @@ -9,26 +11,25 @@ declare i32 @llvm.amdgcn.workgroup.id.z() #0 ; ALL-LABEL {{^}}test_workgroup_id_x: -; HSA: .amd_kernel_code_t -; HSA: user_sgpr_count = 6 -; HSA: enable_sgpr_workgroup_id_x = 1 -; HSA: enable_sgpr_workgroup_id_y = 0 -; HSA: enable_sgpr_workgroup_id_z = 0 -; HSA: enable_sgpr_workgroup_info = 0 -; HSA: enable_vgpr_workitem_id = 0 -; HSA: enable_sgpr_grid_workgroup_count_x = 0 -; HSA: enable_sgpr_grid_workgroup_count_y = 0 -; HSA: enable_sgpr_grid_workgroup_count_z = 0 -; HSA: .end_amd_kernel_code_t +; CO-V2: .amd_kernel_code_t +; CO-V2: user_sgpr_count = 6 +; CO-V2: enable_sgpr_workgroup_id_x = 1 +; CO-V2: enable_sgpr_workgroup_id_y = 0 +; CO-V2: enable_sgpr_workgroup_id_z = 0 +; CO-V2: enable_sgpr_workgroup_info = 0 +; CO-V2: enable_vgpr_workitem_id = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_x = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_y = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_z = 0 +; CO-V2: .end_amd_kernel_code_t -; MESA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s2{{$}} -; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}} +; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s2{{$}} +; CO-V2: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}} -; ALL-NOT: [[VCOPY]] ; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]] -; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 -; ALL-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2 ; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 @@ -40,23 +41,22 @@ define void @test_workgroup_id_x(i32 addrspace(1)* %out) #1 { } ; ALL-LABEL {{^}}test_workgroup_id_y: -; HSA: user_sgpr_count = 6 -; HSA: enable_sgpr_workgroup_id_x = 1 -; HSA: enable_sgpr_workgroup_id_y = 1 -; HSA: enable_sgpr_workgroup_id_z = 0 -; HSA: enable_sgpr_workgroup_info = 0 -; HSA: enable_sgpr_grid_workgroup_count_x = 0 -; HSA: enable_sgpr_grid_workgroup_count_y = 0 -; HSA: enable_sgpr_grid_workgroup_count_z = 0 +; CO-V2: user_sgpr_count = 6 +; CO-V2: enable_sgpr_workgroup_id_x = 1 +; CO-V2: enable_sgpr_workgroup_id_y = 1 +; CO-V2: enable_sgpr_workgroup_id_z = 0 +; CO-V2: enable_sgpr_workgroup_info = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_x = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_y = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_z = 0 -; MESA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}} +; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}} ; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}} -; ALL-NOT: [[VCOPY]] ; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]] -; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 -; ALL-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2 ; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 @@ -68,31 +68,30 @@ define void @test_workgroup_id_y(i32 addrspace(1)* %out) #1 { } ; ALL-LABEL {{^}}test_workgroup_id_z: -; HSA: user_sgpr_count = 6 -; HSA: enable_sgpr_workgroup_id_x = 1 -; HSA: enable_sgpr_workgroup_id_y = 0 -; HSA: enable_sgpr_workgroup_id_z = 1 -; HSA: enable_sgpr_workgroup_info = 0 -; HSA: enable_vgpr_workitem_id = 0 -; HSA: enable_sgpr_private_segment_buffer = 1 -; HSA: enable_sgpr_dispatch_ptr = 0 -; HSA: enable_sgpr_queue_ptr = 0 -; HSA: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: enable_sgpr_dispatch_id = 0 -; HSA: enable_sgpr_flat_scratch_init = 0 -; HSA: enable_sgpr_private_segment_size = 0 -; HSA: enable_sgpr_grid_workgroup_count_x = 0 -; HSA: enable_sgpr_grid_workgroup_count_y = 0 -; HSA: enable_sgpr_grid_workgroup_count_z = 0 +; CO-V2: user_sgpr_count = 6 +; CO-V2: enable_sgpr_workgroup_id_x = 1 +; CO-V2: enable_sgpr_workgroup_id_y = 0 +; CO-V2: enable_sgpr_workgroup_id_z = 1 +; CO-V2: enable_sgpr_workgroup_info = 0 +; CO-V2: enable_vgpr_workitem_id = 0 +; CO-V2: enable_sgpr_private_segment_buffer = 1 +; CO-V2: enable_sgpr_dispatch_ptr = 0 +; CO-V2: enable_sgpr_queue_ptr = 0 +; CO-V2: enable_sgpr_kernarg_segment_ptr = 1 +; CO-V2: enable_sgpr_dispatch_id = 0 +; CO-V2: enable_sgpr_flat_scratch_init = 0 +; CO-V2: enable_sgpr_private_segment_size = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_x = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_y = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_z = 0 -; MESA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}} +; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}} ; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}} -; ALL-NOT: [[VCOPY]] ; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]] -; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 -; ALL-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2 ; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll index 393a593fad08..8fc12890fad0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll @@ -1,7 +1,9 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HSA -check-prefix=CI-HSA %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HSA -check-prefix=VI-HSA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=CO-V2 -check-prefix=CI-HSA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=CO-V2 -check-prefix=VI-HSA %s ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA -check-prefix=SI-MESA %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA -check-prefix=VI-MESA %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,SI-MESA %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,VI-MESA %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 @@ -12,7 +14,7 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0 ; MESA-NEXT: .long 132{{$}} ; ALL-LABEL {{^}}test_workitem_id_x: -; HSA: enable_vgpr_workitem_id = 0 +; CO-V2: enable_vgpr_workitem_id = 0 ; ALL-NOT: v0 ; ALL: {{buffer|flat}}_store_dword {{.*}}v0 @@ -27,7 +29,7 @@ define void @test_workitem_id_x(i32 addrspace(1)* %out) #1 { ; MESA-NEXT: .long 2180{{$}} ; ALL-LABEL {{^}}test_workitem_id_y: -; HSA: enable_vgpr_workitem_id = 1 +; CO-V2: enable_vgpr_workitem_id = 1 ; ALL-NOT: v1 ; ALL: {{buffer|flat}}_store_dword {{.*}}v1 @@ -42,7 +44,7 @@ define void @test_workitem_id_y(i32 addrspace(1)* %out) #1 { ; MESA-NEXT: .long 4228{{$}} ; ALL-LABEL {{^}}test_workitem_id_z: -; HSA: enable_vgpr_workitem_id = 2 +; CO-V2: enable_vgpr_workitem_id = 2 ; ALL-NOT: v2 ; ALL: {{buffer|flat}}_store_dword {{.*}}v2 -- 2.34.1