TEX_LOGICAL_SRC_COORD_COMPONENTS,
/** REQUIRED: Number of derivative components (as UD immediate) */
TEX_LOGICAL_SRC_GRAD_COMPONENTS,
+ /** REQUIRED: request residency (as UD immediate) */
+ TEX_LOGICAL_SRC_RESIDENCY,
TEX_LOGICAL_NUM_SRCS,
};
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
assert(src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM &&
- src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM);
+ src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM &&
+ src[TEX_LOGICAL_SRC_RESIDENCY].file == IMM);
/* Texture coordinates. */
if (i == TEX_LOGICAL_SRC_COORDINATE)
return src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
}
}
+bool
+fs_inst::has_sampler_residency() const
+{
+ switch (opcode) {
+ case SHADER_OPCODE_TEX_LOGICAL:
+ case FS_OPCODE_TXB_LOGICAL:
+ case SHADER_OPCODE_TXL_LOGICAL:
+ case SHADER_OPCODE_TXD_LOGICAL:
+ case SHADER_OPCODE_TXF_LOGICAL:
+ case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
+ case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
+ case SHADER_OPCODE_TXF_CMS_LOGICAL:
+ case SHADER_OPCODE_TXS_LOGICAL:
+ case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
+ case SHADER_OPCODE_TG4_LOGICAL:
+ assert(src[TEX_LOGICAL_SRC_RESIDENCY].file == IMM);
+ return src[TEX_LOGICAL_SRC_RESIDENCY].ud != 0;
+ default:
+ return false;
+ }
+}
+
fs_reg
fs_visitor::vgrf(const glsl_type *const type)
{
/* Specified channel group from the destination region. */
const fs_reg dst = horiz_offset(inst->dst, lbld_after.group() - inst->group);
- const unsigned dst_size = inst->size_written /
- inst->dst.component_size(inst->exec_size);
- if (needs_dst_copy(lbld_after, inst)) {
- const fs_reg tmp = lbld_after.vgrf(inst->dst.type, dst_size);
+ if (!needs_dst_copy(lbld_after, inst)) {
+ /* No need to allocate a temporary for the lowered instruction, just
+ * take the right group of channels from the original region.
+ */
+ return dst;
+ }
+
+ /* Deal with the residency data part later */
+ const unsigned residency_size = inst->has_sampler_residency() ? REG_SIZE : 0;
+ const unsigned dst_size = (inst->size_written - residency_size) /
+ inst->dst.component_size(inst->exec_size);
- if (inst->predicate) {
- /* Handle predication by copying the original contents of
- * the destination into the temporary before emitting the
- * lowered instruction.
- */
- const fs_builder gbld_before =
- lbld_before.group(MIN2(lbld_before.dispatch_width(),
- inst->exec_size), 0);
- for (unsigned k = 0; k < dst_size; ++k) {
- gbld_before.MOV(offset(tmp, lbld_before, k),
- offset(dst, inst->exec_size, k));
- }
- }
+ const fs_reg tmp = lbld_after.vgrf(inst->dst.type,
+ dst_size + inst->has_sampler_residency());
- const fs_builder gbld_after =
- lbld_after.group(MIN2(lbld_after.dispatch_width(),
- inst->exec_size), 0);
+ if (inst->predicate) {
+ /* Handle predication by copying the original contents of the
+ * destination into the temporary before emitting the lowered
+ * instruction.
+ */
+ const fs_builder gbld_before =
+ lbld_before.group(MIN2(lbld_before.dispatch_width(),
+ inst->exec_size), 0);
for (unsigned k = 0; k < dst_size; ++k) {
- /* Use a builder of the right width to perform the copy avoiding
- * uninitialized data if the lowered execution size is greater than
- * the original execution size of the instruction.
- */
- gbld_after.MOV(offset(dst, inst->exec_size, k),
- offset(tmp, lbld_after, k));
+ gbld_before.MOV(offset(tmp, lbld_before, k),
+ offset(dst, inst->exec_size, k));
}
+ }
- return tmp;
+ const fs_builder gbld_after =
+ lbld_after.group(MIN2(lbld_after.dispatch_width(),
+ inst->exec_size), 0);
+ for (unsigned k = 0; k < dst_size; ++k) {
+ /* Use a builder of the right width to perform the copy avoiding
+ * uninitialized data if the lowered execution size is greater than the
+ * original execution size of the instruction.
+ */
+ gbld_after.MOV(offset(dst, inst->exec_size, k),
+ offset(tmp, lbld_after, k));
+ }
- } else {
- /* No need to allocate a temporary for the lowered instruction, just
- * take the right group of channels from the original region.
+ if (inst->has_sampler_residency()) {
+ /* Sampler messages with residency need a special attention. In the
+ * first lane of the last component are located the Pixel Null Mask
+ * (bits 0:15) & some upper bits we need to discard (bits 16:31). We
+ * have to build a single 32bit value for the SIMD32 message out of 2
+ * SIMD16 16 bit values.
*/
- return dst;
+ const fs_builder rbld = gbld_after.exec_all().group(1, 0);
+ fs_reg local_res_reg = component(
+ retype(offset(tmp, lbld_before, dst_size),
+ BRW_REGISTER_TYPE_UW), 0);
+ fs_reg final_res_reg =
+ retype(byte_offset(inst->dst,
+ inst->size_written - residency_size +
+ gbld_after.group() / 8),
+ BRW_REGISTER_TYPE_UW);
+ rbld.MOV(final_res_reg, local_res_reg);
}
+
+ return tmp;
}
bool
* original or the lowered instruction, whichever is lower.
*/
const unsigned n = DIV_ROUND_UP(inst->exec_size, lower_width);
- const unsigned dst_size = inst->size_written /
+ const unsigned residency_size =
+ inst->has_sampler_residency() ? REG_SIZE : 0;
+ const unsigned dst_size =
+ (inst->size_written - residency_size) /
inst->dst.component_size(inst->exec_size);
assert(!inst->writes_accumulator && !inst->mlen);
split_inst.dst = emit_zip(lbld.at(block, inst),
lbld.at(block, after_inst), inst);
split_inst.size_written =
- split_inst.dst.component_size(lower_width) * dst_size;
+ split_inst.dst.component_size(lower_width) * dst_size +
+ residency_size;
lbld.at(block, inst->next).emit(split_inst);
}
srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(0);
srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(3);
srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_ud(0);
+ srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_ud(0);
fs_inst *inst = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs));
inst->size_written = 4 * inst->dst.component_size(inst->exec_size);
srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_d(0);
srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(0);
srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(0);
+ srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_d(0);
/* Since the image size is always uniform, we can just emit a SIMD8
* query instruction and splat the result out.
{
fs_reg srcs[TEX_LOGICAL_NUM_SRCS];
+ /* SKL PRMs: Volume 7: 3D-Media-GPGPU:
+ *
+ * "The Pixel Null Mask field, when enabled via the Pixel Null Mask
+ * Enable will be incorect for sample_c when applied to a surface with
+ * 64-bit per texel format such as R16G16BA16_UNORM. Pixel Null mask
+ * Enable may incorrectly report pixels as referencing a Null surface."
+ *
+ * We'll take care of this in NIR.
+ */
+ assert(!instr->is_sparse || srcs[TEX_LOGICAL_SRC_SHADOW_C].file == BAD_FILE);
+
+ srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_ud(instr->is_sparse);
+
int lod_components = 0;
/* The hardware requires a LOD for buffer textures */
}
}
- fs_reg dst = bld.vgrf(brw_type_for_nir_type(devinfo, instr->dest_type), 4);
+ fs_reg dst = bld.vgrf(brw_type_for_nir_type(devinfo, instr->dest_type), 4 + instr->is_sparse);
fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
inst->offset = header_bits;
assert(instr->dest.is_ssa);
unsigned write_mask = nir_ssa_def_components_read(&instr->dest.ssa);
assert(write_mask != 0); /* dead code should have been eliminated */
- inst->size_written = util_last_bit(write_mask) *
- inst->dst.component_size(inst->exec_size);
+ if (instr->is_sparse) {
+ inst->size_written = (util_last_bit(write_mask) - 1) *
+ inst->dst.component_size(inst->exec_size) +
+ REG_SIZE;
+ } else {
+ inst->size_written = util_last_bit(write_mask) *
+ inst->dst.component_size(inst->exec_size);
+ }
} else {
- inst->size_written = 4 * inst->dst.component_size(inst->exec_size);
+ inst->size_written = 4 * inst->dst.component_size(inst->exec_size) +
+ (instr->is_sparse ? REG_SIZE : 0);
}
if (srcs[TEX_LOGICAL_SRC_SHADOW_C].file != BAD_FILE)
bld.emit_minmax(nir_dest[2], depth, brw_imm_d(1), BRW_CONDITIONAL_GE);
}
+ /* The residency bits are only in the first component. */
+ if (instr->is_sparse)
+ nir_dest[dest_size - 1] = component(offset(dst, bld, dest_size - 1), 0);
+
bld.LOAD_PAYLOAD(get_nir_dest(instr->dest), nir_dest, dest_size, 0);
}
srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = texture_handle;
srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(components);
srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(0);
+ srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_d(0);
fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs,
ARRAY_SIZE(srcs));
#include "compiler/glsl/list.h"
#define MAX_SAMPLER_MESSAGE_SIZE 11
-#define MAX_VGRF_SIZE 16
+
+/* The sampler can return a vec5 when sampling with sparse residency. In
+ * SIMD32, each component takes up 4 GRFs, so we need to allow up to size-20
+ * VGRFs to hold the result.
+ */
+#define MAX_VGRF_SIZE 20
#ifdef __cplusplus
struct backend_reg : private brw_reg
*/
unsigned flags_written(const intel_device_info *devinfo) const;
+ /**
+ * Return true if this instruction is a sampler message gathering residency
+ * data.
+ */
+ bool has_sampler_residency() const;
+
fs_reg dst;
fs_reg *src;
const fs_reg &tg4_offset,
unsigned payload_type_bit_size,
unsigned coord_components,
- unsigned grad_components)
+ unsigned grad_components,
+ bool residency)
{
const brw_compiler *compiler = bld.shader->compiler;
const intel_device_info *devinfo = bld.shader->devinfo;
inst->offset != 0 || inst->eot ||
op == SHADER_OPCODE_SAMPLEINFO ||
sampler_handle.file != BAD_FILE ||
- is_high_sampler(devinfo, sampler)) {
+ is_high_sampler(devinfo, sampler) ||
+ residency) {
/* For general texture offsets (no txf workaround), we need a header to
* put them in.
*
* and we have an explicit header, we need to set up the sampler
* writemask. It's reversed from normal: 1 means "don't write".
*/
- if (!inst->eot && regs_written(inst) != 4 * reg_width) {
- assert(regs_written(inst) % reg_width == 0);
- unsigned mask = ~((1 << (regs_written(inst) / reg_width)) - 1) & 0xf;
+ unsigned reg_count = regs_written(inst) - residency;
+ if (!inst->eot && reg_count < 4 * reg_width) {
+ assert(reg_count % reg_width == 0);
+ unsigned mask = ~((1 << (reg_count / reg_width)) - 1) & 0xf;
inst->offset |= mask << 12;
}
+ if (residency)
+ inst->offset |= 1 << 23; /* g0.2 bit23 : Pixel Null Mask Enable */
+
/* Build the actual header */
const fs_builder ubld = bld.exec_all().group(8, 0);
const fs_builder ubld1 = ubld.group(1, 0);
const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM);
const unsigned grad_components = inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud;
+ assert(inst->src[TEX_LOGICAL_SRC_RESIDENCY].file == IMM);
+ const bool residency = inst->src[TEX_LOGICAL_SRC_RESIDENCY].ud != 0;
+ /* residency is only supported on Gfx8+ */
+ assert(!residency || devinfo->ver >= 8);
if (devinfo->ver >= 7) {
const unsigned msg_payload_type_bit_size =
surface_handle, sampler_handle,
tg4_offset,
msg_payload_type_bit_size,
- coord_components, grad_components);
+ coord_components, grad_components,
+ residency);
} else if (devinfo->ver >= 5) {
lower_sampler_logical_send_gfx5(bld, inst, op, coordinate,
shadow_c, lod, lod2, sample_index,