}
static inline uint32_t
+lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
+ enum lsc_fence_scope scope,
+ enum lsc_flush_type flush_type,
+ bool route_to_lsc)
+{
+ assert(devinfo->has_lsc);
+ return SET_BITS(LSC_OP_FENCE, 5, 0) |
+ SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
+ SET_BITS(scope, 11, 9) |
+ SET_BITS(flush_type, 14, 12) |
+ SET_BITS(route_to_lsc, 18, 18) |
+ SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
+}
+
+static inline uint32_t
brw_mdc_sm2(unsigned exec_size)
{
assert(exec_size == 8 || exec_size == 16);
brw_inst_set_binding_table_index(devinfo, insn, bti);
}
+static void
+gfx12_set_memory_fence_message(struct brw_codegen *p,
+ struct brw_inst *insn,
+ enum brw_message_target sfid)
+{
+ const unsigned mlen = 1; /* g0 header */
+ /* Completion signaled by write to register. No data returned. */
+ const unsigned rlen = 1;
+
+ brw_inst_set_sfid(p->devinfo, insn, sfid);
+
+ enum lsc_fence_scope scope = LSC_FENCE_THREADGROUP;
+ enum lsc_flush_type flush_type = LSC_FLUSH_TYPE_NONE;
+
+ if (sfid == GFX12_SFID_TGM) {
+ scope = LSC_FENCE_GPU;
+ flush_type = LSC_FLUSH_TYPE_EVICT;
+ }
+
+ brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope,
+ flush_type, false) |
+ brw_message_desc(p->devinfo, mlen, rlen, false));
+}
+
void
brw_memory_fence(struct brw_codegen *p,
struct brw_reg dst,
brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
brw_set_dest(p, insn, dst);
brw_set_src0(p, insn, src);
- brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
+
+ /* All DG2 hardware requires LSC for fence messages, even A-step */
+ if (devinfo->has_lsc)
+ gfx12_set_memory_fence_message(p, insn, sfid);
+ else
+ brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
}
void
case nir_intrinsic_memory_barrier:
case nir_intrinsic_begin_invocation_interlock:
case nir_intrinsic_end_invocation_interlock: {
- bool l3_fence, slm_fence;
+ bool l3_fence, slm_fence, tgm_fence = false;
const enum opcode opcode =
instr->intrinsic == nir_intrinsic_begin_invocation_interlock ?
SHADER_OPCODE_INTERLOCK : SHADER_OPCODE_MEMORY_FENCE;
nir_var_mem_ssbo |
nir_var_mem_global);
slm_fence = modes & nir_var_mem_shared;
+
+ /* NIR currently doesn't have an image mode */
+ if (devinfo->has_lsc)
+ tgm_fence = modes & nir_var_mem_ssbo;
break;
}
slm_fence = instr->intrinsic == nir_intrinsic_group_memory_barrier ||
instr->intrinsic == nir_intrinsic_memory_barrier ||
instr->intrinsic == nir_intrinsic_memory_barrier_shared;
+ tgm_fence = instr->intrinsic == nir_intrinsic_memory_barrier_image;
break;
}
devinfo->ver >= 10; /* HSD ES # 1404612949 */
unsigned fence_regs_count = 0;
- fs_reg fence_regs[2] = {};
+ fs_reg fence_regs[3] = {};
const fs_builder ubld = bld.group(8, 0);
ubld.vgrf(BRW_REGISTER_TYPE_UD),
brw_vec8_grf(0, 0),
brw_imm_ud(commit_enable),
- brw_imm_ud(/* bti */ 0));
- fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
+ brw_imm_ud(0 /* BTI; ignored for LSC */));
+
+ fence->sfid = devinfo->has_lsc ?
+ GFX12_SFID_UGM :
+ GFX7_SFID_DATAPORT_DATA_CACHE;
fence_regs[fence_regs_count++] = fence->dst;
fence_regs[fence_regs_count++] = render_fence->dst;
}
+
+ /* Translate l3_fence into untyped and typed fence on XeHP */
+ if (devinfo->has_lsc && tgm_fence) {
+ fs_inst *fence =
+ ubld.emit(opcode,
+ ubld.vgrf(BRW_REGISTER_TYPE_UD),
+ brw_vec8_grf(0, 0),
+ brw_imm_ud(commit_enable),
+ brw_imm_ud(/* ignored */0));
+
+ fence->sfid = GFX12_SFID_TGM;
+ fence_regs[fence_regs_count++] = fence->dst;
+ }
}
if (slm_fence) {
ubld.vgrf(BRW_REGISTER_TYPE_UD),
brw_vec8_grf(0, 0),
brw_imm_ud(commit_enable),
- brw_imm_ud(GFX7_BTI_SLM));
- fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
+ brw_imm_ud(GFX7_BTI_SLM /* ignored for LSC */));
+ if (devinfo->has_lsc)
+ fence->sfid = GFX12_SFID_SLM;
+ else
+ fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
fence_regs[fence_regs_count++] = fence->dst;
}
- assert(fence_regs_count <= 2);
+ assert(fence_regs_count <= 3);
if (stall || fence_regs_count == 0) {
ubld.exec_all().group(1, 0).emit(
abort();
case GFX7_SFID_DATAPORT_DATA_CACHE:
+ case GFX12_SFID_SLM:
+ case GFX12_SFID_TGM:
+ case GFX12_SFID_UGM:
case HSW_SFID_DATAPORT_DATA_CACHE_1:
if (devinfo->ver >= 7)
return calculate_desc(info, unit_dp_dc, 2, 0, 0, 30 /* XXX */, 0,