From dd8fd9dcfdc244184a591c0f2d2c034857641c9f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 8 Apr 2017 21:28:38 +0000 Subject: [PATCH] AMDGPU: Actually write nops for writeNopData Before this was just writing 0s, which ends up looking like a v_cndmask_b32 v0, s0, v0, vcc. Write out an encoded s_nop instead. llvm-svn: 299816 --- .../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp | 15 +++- llvm/test/CodeGen/AMDGPU/nop-data.ll | 87 ++++++++++++++++++++++ 2 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/nop-data.ll diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 5a7790f..f3266fe 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -164,7 +164,20 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo( } bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { - OW->WriteZeros(Count); + // If the count is not 4-byte aligned, we must be writing data into the text + // section (otherwise we have unaligned instructions, and thus have far + // bigger problems), so just write zeros instead. + OW->WriteZeros(Count % 4); + + // We are properly aligned, so write NOPs as requested. + Count /= 4; + + // FIXME: R600 support. + // s_nop 0 + const uint32_t Encoded_S_NOP_0 = 0xbf800000; + + for (uint64_t I = 0; I != Count; ++I) + OW->write32(Encoded_S_NOP_0); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/nop-data.ll b/llvm/test/CodeGen/AMDGPU/nop-data.ll new file mode 100644 index 0000000..b68f343 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/nop-data.ll @@ -0,0 +1,87 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - -mcpu=fiji | FileCheck %s + +; CHECK: kernel0: +; CHECK-NEXT: s_endpgm +define amdgpu_kernel void @kernel0() align 256 { +entry: + ret void +} + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 // 0000000001FC: BF800000 + +; CHECK-NEXT: {{^$}} +; CHECK-NEXT: kernel1: +; CHECK-NEXT: s_endpgm +define amdgpu_kernel void @kernel1(i32 addrspace(1)* addrspace(2)* %ptr.out) align 256 { +entry: + ret void +} -- 2.7.4