intel_set_ps_dispatch_state(&ps, &batch->screen->devinfo,
wm_prog_data,
- ice->state.framebuffer.samples);
+ ice->state.framebuffer.samples,
+ 0 /* msaa_flags */);
ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;
psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
- psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
+ psx.PixelShaderIsPerSample =
+ brw_wm_prog_data_is_persample(wm_prog_data, 0);
/* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
if (wm_prog_data->uses_sample_mask)
else
wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
- if (wm_prog_data->persample_dispatch)
+ if (brw_wm_prog_data_is_persample(wm_prog_data, 0))
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
else
wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;
psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
- psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
+ psx.PixelShaderIsPerSample =
+ brw_wm_prog_data_is_persample(wm_prog_data, 0);
psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
#if GFX_VER >= 9
uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0};
_iris_pack_command(batch, GENX(3DSTATE_PS), ps_state, ps) {
intel_set_ps_dispatch_state(&ps, batch->screen->devinfo,
- wm_prog_data, cso_fb->samples);
+ wm_prog_data, cso_fb->samples,
+ 0 /* msaa_flags */);
ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
if (prog_data) {
intel_set_ps_dispatch_state(&ps, devinfo, prog_data,
- params->num_samples);
+ params->num_samples,
+ 0 /* msaa_flags */);
ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
if (prog_data) {
intel_set_ps_dispatch_state(&ps, devinfo, prog_data,
- params->num_samples);
+ params->num_samples,
+ 0 /* msaa_flags */);
ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps,
const struct intel_device_info *devinfo,
const struct brw_wm_prog_data *prog_data,
- unsigned rasterization_samples)
+ unsigned rasterization_samples,
+ enum brw_wm_msaa_flags msaa_flags)
{
assert(rasterization_samples != 0);
enable_8 = false;
#endif
- if (prog_data->persample_dispatch) {
+ const bool is_persample_dispatch =
+ brw_wm_prog_data_is_persample(prog_data, msaa_flags);
+
+ if (is_persample_dispatch) {
/* TGL PRMs, Volume 2d: Command Reference: Structures:
* 3DSTATE_PS_BODY::32 Pixel Dispatch Enable:
*
*
* 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8.
*/
- if (GFX_VER >= 9 && rasterization_samples == 16 &&
- !prog_data->persample_dispatch) {
+ if (GFX_VER >= 9 && rasterization_samples == 16 && !is_persample_dispatch) {
assert(enable_8 || enable_16);
enable_32 = false;
}
#include "c11/threads.h"
#include "dev/intel_device_info.h"
#include "util/macros.h"
+#include "util/enum_operators.h"
#include "util/ralloc.h"
#include "util/u_math.h"
#include "brw_isa_info.h"
BRW_ALWAYS
};
+static inline enum brw_sometimes
+brw_sometimes_invert(enum brw_sometimes x)
+{
+ return (enum brw_sometimes)((int)BRW_ALWAYS - (int)x);
+}
+
/** The program key for Fragment/Pixel Shaders. */
struct brw_wm_prog_key {
struct brw_base_prog_key base;
BRW_PSCDEPTH_ON_LE = 3, /* PS guarantees output depth <= source depth */
};
+enum brw_wm_msaa_flags {
+ /** Must be set whenever any dynamic MSAA is used
+ *
+ * This flag mostly exists to let us assert that the driver understands
+ * dynamic MSAA so we don't run into trouble with drivers that don't.
+ */
+ BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC = (1 << 0),
+
+ /** True if the framebuffer is multisampled */
+ BRW_WM_MSAA_FLAG_MULTISAMPLE_FBO = (1 << 1),
+
+ /** True if this shader has been dispatched per-sample */
+ BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH = (1 << 2),
+
+ /** True if this shader has been dispatched coarse
+ *
+ * This is intentionally chose to be bit 18 to correspond to the coarse
+ * write bit in the FB write message descriptor.
+ */
+ BRW_WM_MSAA_FLAG_COARSE_DISPATCH = (1 << 18),
+};
+MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(enum brw_wm_msaa_flags)
+
/* Data about a particular attempt to compile a program. Note that
* there can be many of these, each in a different GL state
* corresponding to a different brw_wm_prog_key struct, with different
bool dispatch_16;
bool dispatch_32;
bool dual_src_blend;
- bool persample_dispatch;
bool uses_pos_offset;
bool uses_omask;
bool uses_kill;
bool contains_flat_varying;
bool contains_noperspective_varying;
+ /** True if the shader wants sample shading
+ *
+ * This corresponds to whether or not a gl_SampleId, gl_SamplePosition, or
+ * a sample-qualified input are used in the shader. It is independent of
+ * GL_MIN_SAMPLE_SHADING_VALUE in GL or minSampleShading in Vulkan.
+ */
+ bool sample_shading;
+
+ /** Should this shader be dispatched per-sample */
+ enum brw_sometimes persample_dispatch;
+
/**
* Shader is ran at the coarse pixel shading dispatch rate (3DSTATE_CPS).
*/
- bool per_coarse_pixel_dispatch;
+ enum brw_sometimes coarse_pixel_dispatch;
+
+ unsigned msaa_flags_param;
/**
* Mask of which interpolation modes are required by the fragment shader.
_brw_wm_prog_data_reg_blocks(prog_data, \
brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx))
+static inline bool
+brw_wm_prog_data_is_persample(const struct brw_wm_prog_data *prog_data,
+ enum brw_wm_msaa_flags pushed_msaa_flags)
+{
+ if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC) {
+ if (!(pushed_msaa_flags & BRW_WM_MSAA_FLAG_MULTISAMPLE_FBO))
+ return false;
+
+ if (prog_data->sample_shading)
+ assert(pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH);
+
+ if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH)
+ assert(prog_data->persample_dispatch != BRW_NEVER);
+ else
+ assert(prog_data->persample_dispatch != BRW_ALWAYS);
+
+ return (pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH) != 0;
+ }
+
+ assert(prog_data->persample_dispatch == BRW_ALWAYS ||
+ prog_data->persample_dispatch == BRW_NEVER);
+
+ return prog_data->persample_dispatch;
+}
+
+static inline bool
+brw_wm_prog_data_is_coarse(const struct brw_wm_prog_data *prog_data,
+ enum brw_wm_msaa_flags pushed_msaa_flags)
+{
+ if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC) {
+ if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_COARSE_DISPATCH)
+ assert(prog_data->coarse_pixel_dispatch != BRW_NEVER);
+ else
+ assert(prog_data->coarse_pixel_dispatch != BRW_ALWAYS);
+
+ return pushed_msaa_flags & BRW_WM_MSAA_FLAG_COARSE_DISPATCH;
+ }
+
+ assert(prog_data->coarse_pixel_dispatch == BRW_ALWAYS ||
+ prog_data->coarse_pixel_dispatch == BRW_NEVER);
+
+ return prog_data->coarse_pixel_dispatch;
+}
+
struct brw_push_const_block {
unsigned dwords; /* Dword count, not reg aligned */
unsigned regs;
const fs_builder abld = bld.annotate("compute sample position");
fs_reg pos = abld.vgrf(BRW_REGISTER_TYPE_F, 2);
- if (!wm_prog_data->persample_dispatch) {
+ if (wm_prog_data->persample_dispatch == BRW_NEVER) {
/* From ARB_sample_shading specification:
* "When rendering to a non-multisample buffer, or if multisample
* rasterization is disabled, gl_SamplePosition will always be
abld.MUL(offset(pos, abld, i), tmp_f, brw_imm_f(1 / 16.0f));
}
+ if (wm_prog_data->persample_dispatch == BRW_SOMETIMES) {
+ check_dynamic_msaa_flag(abld, wm_prog_data,
+ BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH);
+ for (unsigned i = 0; i < 2; i++) {
+ set_predicate(BRW_PREDICATE_NORMAL,
+ bld.SEL(offset(pos, abld, i), offset(pos, abld, i),
+ brw_imm_f(0.5f)));
+ }
+ }
+
return pos;
}
assert(devinfo->ver >= 6);
/* The HW doesn't provide us with expected values. */
- assert(!wm_prog_data->per_coarse_pixel_dispatch);
+ assert(wm_prog_data->coarse_pixel_dispatch != BRW_ALWAYS);
fs_reg coverage_mask =
fetch_payload_reg(bld, fs_payload().sample_mask_in_reg, BRW_REGISTER_TYPE_D);
- if (!wm_prog_data->persample_dispatch)
+ if (wm_prog_data->persample_dispatch == BRW_NEVER)
return coverage_mask;
/* gl_SampleMaskIn[] comes from two sources: the input coverage mask,
fs_reg mask = bld.vgrf(BRW_REGISTER_TYPE_D);
abld.AND(mask, enabled_mask, coverage_mask);
+ if (wm_prog_data->persample_dispatch == BRW_ALWAYS)
+ return mask;
+
+ check_dynamic_msaa_flag(abld, wm_prog_data,
+ BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH);
+ set_predicate(BRW_PREDICATE_NORMAL, abld.SEL(mask, mask, coverage_mask));
+
return mask;
}
/* Coarse pixel shading size fields overlap with other fields of not in
* coarse pixel dispatch mode, so report 0 when that's not the case.
*/
- if (!wm_prog_data->per_coarse_pixel_dispatch)
+ if (wm_prog_data->coarse_pixel_dispatch == BRW_NEVER)
return brw_imm_ud(0);
const fs_builder abld = bld.annotate("compute fragment shading rate");
fs_reg rate = abld.vgrf(BRW_REGISTER_TYPE_UD);
abld.OR(rate, int_rate_x, int_rate_y);
+ if (wm_prog_data->coarse_pixel_dispatch == BRW_ALWAYS)
+ return rate;
+
+ check_dynamic_msaa_flag(abld, wm_prog_data,
+ BRW_WM_MSAA_FLAG_COARSE_DISPATCH);
+ set_predicate(BRW_PREDICATE_NORMAL, abld.SEL(rate, rate, brw_imm_ud(0)));
+
return rate;
}
prog_data->computed_stencil =
shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
- prog_data->persample_dispatch =
- key->multisample_fbo &&
- (key->persample_interp ||
- shader->info.fs.uses_sample_shading);
+ prog_data->sample_shading =
+ shader->info.fs.uses_sample_shading ||
+ shader->info.outputs_read;
+
+ prog_data->persample_dispatch = BRW_NEVER;
+ if (key->multisample_fbo &&
+ (key->persample_interp || prog_data->sample_shading))
+ prog_data->persample_dispatch = BRW_ALWAYS;
if (devinfo->ver >= 6) {
prog_data->uses_sample_mask =
* per-sample dispatch. If we need gl_SamplePosition and we don't have
* persample dispatch, we hard-code it to 0.5.
*/
- prog_data->uses_pos_offset = prog_data->persample_dispatch &&
+ prog_data->uses_pos_offset =
+ prog_data->persample_dispatch != BRW_NEVER &&
(BITSET_TEST(shader->info.system_values_read,
SYSTEM_VALUE_SAMPLE_POS) ||
BITSET_TEST(shader->info.system_values_read,
/* You can't be coarse and per-sample */
assert(!key->coarse_pixel || !key->persample_interp);
- prog_data->per_coarse_pixel_dispatch =
- key->coarse_pixel &&
- !shader->info.fs.uses_sample_shading &&
- !prog_data->uses_omask &&
- !prog_data->uses_sample_mask &&
- (prog_data->computed_depth_mode == BRW_PSCDEPTH_OFF) &&
- !prog_data->computed_stencil;
+ prog_data->coarse_pixel_dispatch =
+ brw_sometimes_invert(prog_data->persample_dispatch);
+ if (!key->coarse_pixel ||
+ prog_data->uses_omask ||
+ prog_data->sample_shading ||
+ prog_data->uses_sample_mask ||
+ (prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF) ||
+ prog_data->computed_stencil) {
+ prog_data->coarse_pixel_dispatch = BRW_NEVER;
+ }
/* We choose to always enable VMask prior to XeHP, as it would cause
* us to lose out on the eliminate_find_live_channel() optimization.
prog_data->uses_vmask = devinfo->verx10 < 125 ||
shader->info.fs.needs_quad_helper_invocations ||
shader->info.fs.needs_all_helper_invocations ||
- prog_data->per_coarse_pixel_dispatch;
+ prog_data->coarse_pixel_dispatch != BRW_NEVER;
prog_data->uses_src_w =
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
prog_data->uses_src_depth =
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) &&
- !prog_data->per_coarse_pixel_dispatch;
+ prog_data->coarse_pixel_dispatch != BRW_ALWAYS;
prog_data->uses_depth_w_coefficients =
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) &&
- prog_data->per_coarse_pixel_dispatch;
+ prog_data->coarse_pixel_dispatch != BRW_NEVER;
calculate_urb_setup(devinfo, key, prog_data, shader, mue_map);
brw_compute_flat_inputs(prog_data, shader);
return tmp;
}
+ inline fs_reg
+ dynamic_msaa_flags(const struct brw_wm_prog_data *wm_prog_data)
+ {
+ return fs_reg(UNIFORM, wm_prog_data->msaa_flags_param,
+ BRW_REGISTER_TYPE_UD);
+ }
+
+ inline void
+ check_dynamic_msaa_flag(const fs_builder &bld,
+ const struct brw_wm_prog_data *wm_prog_data,
+ enum brw_wm_msaa_flags flag)
+ {
+ fs_inst *inst = bld.AND(bld.null_reg_ud(),
+ dynamic_msaa_flags(wm_prog_data),
+ brw_imm_ud(flag));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ }
+
bool
lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i);
}
struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data);
- fs_reg int_pixel_offset_x, int_pixel_offset_y; /* Used on Gen12HP+ */
- fs_reg int_pixel_offset_xy; /* Used on Gen8+ */
- fs_reg half_int_pixel_offset_x, half_int_pixel_offset_y;
- if (!wm_prog_data->per_coarse_pixel_dispatch) {
+ fs_reg int_sample_offset_x, int_sample_offset_y; /* Used on Gen12HP+ */
+ fs_reg int_sample_offset_xy; /* Used on Gen8+ */
+ fs_reg half_int_sample_offset_x, half_int_sample_offset_y;
+ if (wm_prog_data->coarse_pixel_dispatch != BRW_ALWAYS) {
/* The thread payload only delivers subspan locations (ss0, ss1,
* ss2, ...). Since subspans covers 2x2 pixels blocks, we need to
* generate 4 pixel coordinates out of each subspan location. We do this
* coordinates out of 2 subspans coordinates in a single ADD instruction
* (twice the operation above).
*/
- int_pixel_offset_xy = fs_reg(brw_imm_v(0x11001010));
- half_int_pixel_offset_x = fs_reg(brw_imm_uw(0));
- half_int_pixel_offset_y = fs_reg(brw_imm_uw(0));
+ int_sample_offset_xy = fs_reg(brw_imm_v(0x11001010));
+ half_int_sample_offset_x = fs_reg(brw_imm_uw(0));
+ half_int_sample_offset_y = fs_reg(brw_imm_uw(0));
/* On Gfx12.5, because of regioning restrictions, the interpolation code
* is slightly different and works off X & Y only inputs. The ordering
* of the half bytes here is a bit odd, with each subspan replicated
* X offset: 0 0 1 0 0 0 1 0
* Y offset: 0 0 0 0 1 0 1 0
*/
- int_pixel_offset_x = fs_reg(brw_imm_v(0x01000100));
- int_pixel_offset_y = fs_reg(brw_imm_v(0x01010000));
- } else {
+ int_sample_offset_x = fs_reg(brw_imm_v(0x01000100));
+ int_sample_offset_y = fs_reg(brw_imm_v(0x01010000));
+ }
+
+ fs_reg int_coarse_offset_x, int_coarse_offset_y; /* Used on Gen12HP+ */
+ fs_reg int_coarse_offset_xy; /* Used on Gen8+ */
+ fs_reg half_int_coarse_offset_x, half_int_coarse_offset_y;
+ if (wm_prog_data->coarse_pixel_dispatch != BRW_NEVER) {
/* In coarse pixel dispatch we have to do the same ADD instruction that
* we do in normal per pixel dispatch, except this time we're not adding
* 1 in each direction, but instead the coarse pixel size.
/* To build the array of half bytes we do and AND operation with the
* right mask in X.
*/
- int_pixel_offset_x = dbld.vgrf(BRW_REGISTER_TYPE_UW);
- dbld.AND(int_pixel_offset_x, byte_offset(r1_0, 0), brw_imm_v(0x0f000f00));
+ int_coarse_offset_x = dbld.vgrf(BRW_REGISTER_TYPE_UW);
+ dbld.AND(int_coarse_offset_x, byte_offset(r1_0, 0), brw_imm_v(0x0f000f00));
/* And the right mask in Y. */
- int_pixel_offset_y = dbld.vgrf(BRW_REGISTER_TYPE_UW);
- dbld.AND(int_pixel_offset_y, byte_offset(r1_0, 1), brw_imm_v(0x0f0f0000));
+ int_coarse_offset_y = dbld.vgrf(BRW_REGISTER_TYPE_UW);
+ dbld.AND(int_coarse_offset_y, byte_offset(r1_0, 1), brw_imm_v(0x0f0f0000));
} else {
/* To build the array of half bytes we do and AND operation with the
* right mask in X.
*/
- int_pixel_offset_x = dbld.vgrf(BRW_REGISTER_TYPE_UW);
- dbld.AND(int_pixel_offset_x, byte_offset(r1_0, 0), brw_imm_v(0x0000f0f0));
+ int_coarse_offset_x = dbld.vgrf(BRW_REGISTER_TYPE_UW);
+ dbld.AND(int_coarse_offset_x, byte_offset(r1_0, 0), brw_imm_v(0x0000f0f0));
/* And the right mask in Y. */
- int_pixel_offset_y = dbld.vgrf(BRW_REGISTER_TYPE_UW);
- dbld.AND(int_pixel_offset_y, byte_offset(r1_0, 1), brw_imm_v(0xff000000));
+ int_coarse_offset_y = dbld.vgrf(BRW_REGISTER_TYPE_UW);
+ dbld.AND(int_coarse_offset_y, byte_offset(r1_0, 1), brw_imm_v(0xff000000));
/* Finally OR the 2 registers. */
- int_pixel_offset_xy = dbld.vgrf(BRW_REGISTER_TYPE_UW);
- dbld.OR(int_pixel_offset_xy, int_pixel_offset_x, int_pixel_offset_y);
+ int_coarse_offset_xy = dbld.vgrf(BRW_REGISTER_TYPE_UW);
+ dbld.OR(int_coarse_offset_xy, int_coarse_offset_x, int_coarse_offset_y);
}
- /* Also compute the half pixel size used to center pixels. */
- half_int_pixel_offset_x = bld.vgrf(BRW_REGISTER_TYPE_UW);
- half_int_pixel_offset_y = bld.vgrf(BRW_REGISTER_TYPE_UW);
+ /* Also compute the half coarse size used to center coarses. */
+ half_int_coarse_offset_x = bld.vgrf(BRW_REGISTER_TYPE_UW);
+ half_int_coarse_offset_y = bld.vgrf(BRW_REGISTER_TYPE_UW);
+
+ bld.SHR(half_int_coarse_offset_x, suboffset(r1_0, 0), brw_imm_ud(1));
+ bld.SHR(half_int_coarse_offset_y, suboffset(r1_0, 1), brw_imm_ud(1));
+ }
+
+ fs_reg int_pixel_offset_x, int_pixel_offset_y; /* Used on Gen12HP+ */
+ fs_reg int_pixel_offset_xy; /* Used on Gen8+ */
+ fs_reg half_int_pixel_offset_x, half_int_pixel_offset_y;
+ switch (wm_prog_data->coarse_pixel_dispatch) {
+ case BRW_NEVER:
+#define COPY_OFFSET_REG(prefix, suffix) \
+ prefix##_pixel_##suffix = prefix##_sample_##suffix;
+
+ COPY_OFFSET_REG(int, offset_x)
+ COPY_OFFSET_REG(int, offset_y)
+ COPY_OFFSET_REG(int, offset_xy)
+ COPY_OFFSET_REG(half_int, offset_x)
+ COPY_OFFSET_REG(half_int, offset_y)
+
+#undef COPY_OFFSET_REG
+ break;
+
+ case BRW_SOMETIMES:
+ check_dynamic_msaa_flag(bld, wm_prog_data,
+ BRW_WM_MSAA_FLAG_COARSE_DISPATCH);
+
+#define COPY_OFFSET_REG(prefix, suffix) \
+ prefix##_pixel_##suffix = bld.vgrf(BRW_REGISTER_TYPE_UW); \
+ bld.SEL(prefix##_pixel_##suffix, \
+ prefix##_coarse_##suffix, \
+ prefix##_pixel_##suffix); \
+
+ COPY_OFFSET_REG(int, offset_x)
+ COPY_OFFSET_REG(int, offset_y)
+ COPY_OFFSET_REG(int, offset_xy)
+ COPY_OFFSET_REG(half_int, offset_x)
+ COPY_OFFSET_REG(half_int, offset_y)
- bld.SHR(half_int_pixel_offset_x, suboffset(r1_0, 0), brw_imm_ud(1));
- bld.SHR(half_int_pixel_offset_y, suboffset(r1_0, 1), brw_imm_ud(1));
+#undef COPY_OFFSET_REG
+ break;
+
+ case BRW_ALWAYS:
+#define COPY_OFFSET_REG(prefix, suffix) \
+ prefix##_pixel_##suffix = prefix##_coarse_##suffix;
+
+ COPY_OFFSET_REG(int, offset_x)
+ COPY_OFFSET_REG(int, offset_y)
+ COPY_OFFSET_REG(int, offset_xy)
+ COPY_OFFSET_REG(half_int, offset_x)
+ COPY_OFFSET_REG(half_int, offset_y)
+
+#undef COPY_OFFSET_REG
+ break;
}
for (unsigned i = 0; i < DIV_ROUND_UP(dispatch_width, 16); i++) {
fs_reg(stride(suboffset(gi_uw, 5), 2, 8, 0)),
int_pixel_offset_y);
- if (wm_prog_data->per_coarse_pixel_dispatch) {
- dbld.ADD(int_pixel_x, int_pixel_x,
- horiz_stride(half_int_pixel_offset_x, 0));
- dbld.ADD(int_pixel_y, int_pixel_y,
- horiz_stride(half_int_pixel_offset_y, 0));
+ if (wm_prog_data->coarse_pixel_dispatch != BRW_NEVER) {
+ fs_inst *addx = dbld.ADD(int_pixel_x, int_pixel_x,
+ horiz_stride(half_int_pixel_offset_x, 0));
+ fs_inst *addy = dbld.ADD(int_pixel_y, int_pixel_y,
+ horiz_stride(half_int_pixel_offset_y, 0));
+ if (wm_prog_data->coarse_pixel_dispatch != BRW_ALWAYS) {
+ addx->predicate = BRW_PREDICATE_NORMAL;
+ addy->predicate = BRW_PREDICATE_NORMAL;
+ }
}
hbld.MOV(offset(pixel_x, hbld, i), horiz_stride(int_pixel_x, 2));
}
abld = bld.annotate("compute pos.z");
+ fs_reg coarse_z;
if (wm_prog_data->uses_depth_w_coefficients) {
- assert(!wm_prog_data->uses_src_depth);
/* In coarse pixel mode, the HW doesn't interpolate Z coordinate
* properly. In the same way we have to add the coarse pixel size to
* pixels locations, here we recompute the Z value with 2 coefficients
abld.MAD(float_pixel_x, float_pixel_x, brw_imm_f(0.5f), f_cps_width);
abld.MAD(float_pixel_y, float_pixel_y, brw_imm_f(0.5f), f_cps_height);
- this->pixel_z = abld.vgrf(BRW_REGISTER_TYPE_F);
- abld.MAD(this->pixel_z, z_c0, z_cx, float_pixel_x);
- abld.MAD(this->pixel_z, this->pixel_z, z_cy, float_pixel_y);
+ coarse_z = abld.vgrf(BRW_REGISTER_TYPE_F);
+ abld.MAD(coarse_z, z_c0, z_cx, float_pixel_x);
+ abld.MAD(coarse_z, coarse_z, z_cy, float_pixel_y);
}
if (wm_prog_data->uses_src_depth) {
this->pixel_z = fetch_payload_reg(bld, fs_payload().source_depth_reg);
}
+ if (wm_prog_data->uses_depth_w_coefficients ||
+ wm_prog_data->uses_src_depth) {
+ fs_reg sample_z = this->pixel_z;
+
+ switch (wm_prog_data->coarse_pixel_dispatch) {
+ case BRW_NEVER:
+ assert(wm_prog_data->uses_src_depth);
+ assert(!wm_prog_data->uses_depth_w_coefficients);
+ this->pixel_z = sample_z;
+ break;
+
+ case BRW_SOMETIMES:
+ assert(wm_prog_data->uses_src_depth);
+ assert(wm_prog_data->uses_depth_w_coefficients);
+ this->pixel_z = abld.vgrf(BRW_REGISTER_TYPE_F);
+
+ /* We re-use the check_dynamic_msaa_flag() call from above */
+ abld.SEL(this->pixel_z, coarse_z, sample_z);
+ break;
+
+ case BRW_ALWAYS:
+ assert(!wm_prog_data->uses_src_depth);
+ assert(wm_prog_data->uses_depth_w_coefficients);
+ this->pixel_z = coarse_z;
+ break;
+ }
+ }
+
if (wm_prog_data->uses_src_w) {
abld = bld.annotate("compute pos.w");
this->pixel_w = fetch_payload_reg(abld, fs_payload().source_w_reg);
inst->desc =
(inst->group / 16) << 11 | /* rt slot group */
brw_fb_write_desc(devinfo, inst->target, msg_ctl, inst->last_rt,
- prog_data->per_coarse_pixel_dispatch);
+ 0 /* coarse_write */);
+
+ fs_reg desc = brw_imm_ud(0);
+ if (prog_data->coarse_pixel_dispatch == BRW_ALWAYS) {
+ inst->desc |= (1 << 18);
+ } else if (prog_data->coarse_pixel_dispatch == BRW_SOMETIMES) {
+ STATIC_ASSERT(BRW_WM_MSAA_FLAG_COARSE_DISPATCH == (1 << 18));
+ const fs_builder &ubld = bld.exec_all().group(8, 0);
+ desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+ ubld.AND(desc, dynamic_msaa_flags(prog_data),
+ brw_imm_ud(BRW_WM_MSAA_FLAG_COARSE_DISPATCH));
+ }
uint32_t ex_desc = 0;
if (devinfo->ver >= 11) {
inst->opcode = SHADER_OPCODE_SEND;
inst->resize_sources(3);
inst->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE;
- inst->src[0] = brw_imm_ud(0);
+ inst->src[0] = desc;
inst->src[1] = brw_imm_ud(0);
inst->src[2] = payload;
inst->mlen = regs_written(load);
fs_reg payload = brw_vec8_grf(0, 0);
unsigned mlen = 1;
- const fs_reg desc = inst->src[1];
-
unsigned mode;
switch (inst->opcode) {
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
unreachable("Invalid interpolator instruction");
}
+ fs_reg desc = inst->src[1];
uint32_t desc_imm =
brw_pixel_interp_desc(devinfo, mode, inst->pi_noperspective,
- wm_prog_data->per_coarse_pixel_dispatch,
+ false /* coarse_pixel_rate */,
inst->exec_size, inst->group);
+ if (wm_prog_data->coarse_pixel_dispatch == BRW_ALWAYS) {
+ desc_imm |= (1 << 15);
+ } else if (wm_prog_data->coarse_pixel_dispatch == BRW_SOMETIMES) {
+ fs_reg orig_desc = desc;
+ const fs_builder &ubld = bld.exec_all().group(8, 0);
+ desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+ ubld.AND(desc, dynamic_msaa_flags(wm_prog_data),
+ brw_imm_ud(BRW_WM_MSAA_FLAG_COARSE_DISPATCH));
+
+ /* The uniform is in bit 18 but we need it in bit 15 */
+ STATIC_ASSERT(BRW_WM_MSAA_FLAG_COARSE_DISPATCH == (1 << 18));
+ ubld.SHR(desc, desc, brw_imm_ud(3));
+
+ /* And, if it's AT_OFFSET, we might have a non-trivial descriptor */
+ if (orig_desc.file == IMM) {
+ desc_imm |= orig_desc.ud;
+ } else {
+ ubld.OR(desc, desc, orig_desc);
+ }
+ }
+
assert(bld.shader->devinfo->ver >= 7);
inst->opcode = SHADER_OPCODE_SEND;
inst->sfid = GFX7_SFID_PIXEL_INTERPOLATOR;
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
- ms != NULL ? ms->rasterization_samples : 1);
+ ms != NULL ? ms->rasterization_samples : 1,
+ 0 /* msaa_flags */);
ps.KernelStartPointer0 = fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
ps.PixelShaderValid = true;
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
- ps.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
+ ps.PixelShaderIsPerSample =
+ brw_wm_prog_data_is_persample(wm_prog_data, 0);
ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
assert(!wm_prog_data->inner_coverage); /* Not available in SPIR-V */
if (!wm_prog_data->uses_sample_mask)
ps.InputCoverageMaskState = ICMS_NONE;
- else if (wm_prog_data->per_coarse_pixel_dispatch)
+ else if (brw_wm_prog_data_is_coarse(wm_prog_data, 0))
ps.InputCoverageMaskState = ICMS_NORMAL;
else if (wm_prog_data->post_depth_coverage)
ps.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
#if GFX_VER >= 11
ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients =
wm_prog_data->uses_depth_w_coefficients;
- ps.PixelShaderIsPerCoarsePixel = wm_prog_data->per_coarse_pixel_dispatch;
+ ps.PixelShaderIsPerCoarsePixel = wm_prog_data->coarse_pixel_dispatch;
#endif
#if GFX_VERx10 >= 125
/* TODO: We should only require this when the last geometry shader uses
* a fragment shading rate that is not constant.
*/
- ps.EnablePSDependencyOnCPsizeChange = wm_prog_data->per_coarse_pixel_dispatch;
+ ps.EnablePSDependencyOnCPsizeChange =
+ brw_wm_prog_data_is_coarse(wm_prog_data, 0);
#endif
}
}
const struct vk_fragment_shading_rate_state *fsr)
{
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
- const bool cps_enable = wm_prog_data && wm_prog_data->per_coarse_pixel_dispatch;
+ const bool cps_enable = wm_prog_data &&
+ brw_wm_prog_data_is_coarse(wm_prog_data, 0);
#if GFX_VER == 11
anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) {
wm.PixelShaderKillsPixel;
if (ms != NULL && ms->rasterization_samples > 1) {
- if (wm_prog_data->persample_dispatch) {
+ if (brw_wm_prog_data_is_persample(wm_prog_data, 0)) {
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
} else {
wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
- ms != NULL ? ms->rasterization_samples : 1);
+ ms != NULL ? ms->rasterization_samples : 1,
+ 0 /* msaa_flags */);
ps.KernelStartPointer0 = fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
ps.PixelShaderValid = true;
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
- ps.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
+ ps.PixelShaderIsPerSample =
+ brw_wm_prog_data_is_persample(wm_prog_data, 0);
ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;