struct amdgpu_winsys *ws = cs->ws;
int error_code = 0;
uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ip_type];
- unsigned alignment = ws->info.ip[cs->ip_type].ib_size_alignment / 4;
rcs->current.max_dw += amdgpu_cs_epilog_dws(cs);
break;
case AMD_IP_GFX:
case AMD_IP_COMPUTE:
- if (rcs->current.cdw % alignment) {
- int remaining = alignment - rcs->current.cdw % alignment;
-
- /* Only pad by 1 dword with the type-2 NOP if necessary. */
- if (remaining == 1 && ws->info.gfx_ib_pad_with_type2) {
+ if (ws->info.gfx_ib_pad_with_type2) {
+ while (rcs->current.cdw & ib_pad_dw_mask)
radeon_emit(rcs, PKT2_NOP_PAD);
- } else {
- /* Pad with a single NOP packet to minimize CP overhead because NOP is a variable-sized
- * packet. The size of the packet body after the header is always count + 1.
- * If count == -1, there is no packet body. NOP is the only packet that can have
- * count == -1, which is the definition of PKT3_NOP_PAD (count == 0x3fff means -1).
- */
- radeon_emit(rcs, PKT3(PKT3_NOP, remaining - 2, 0));
- rcs->current.cdw += remaining - 1;
- }
+ } else {
+ while (rcs->current.cdw & ib_pad_dw_mask)
+ radeon_emit(rcs, PKT3_NOP_PAD);
}
- assert(rcs->current.cdw % alignment == 0);
if (cs->ip_type == AMD_IP_GFX)
ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4;
break;