key->flat_shade = rast->cso.flatshade &&
(info->inputs_read & (VARYING_BIT_COL0 | VARYING_BIT_COL1));
- key->persample_interp = rast->cso.force_persample_interp;
key->multisample_fbo = rast->cso.multisample && fb->samples > 1;
+ key->persample_interp =
+ rast->cso.force_persample_interp ? BRW_ALWAYS : BRW_NEVER;
key->ignore_sample_mask_out = !key->multisample_fbo;
key->coherent_fb_fetch = false; // TODO: needed?
.alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
.alpha_to_coverage = key->alpha_to_coverage,
.clamp_fragment_color = key->clamp_fragment_color,
- .persample_interp = key->persample_interp,
+ .persample_interp = key->persample_interp ? BRW_ALWAYS : BRW_NEVER,
.multisample_fbo = key->multisample_fbo,
.force_dual_color_blend = key->force_dual_color_blend,
.coherent_fb_fetch = key->coherent_fb_fetch,
wm.StatisticsEnable = ice->state.statistics_counters_enabled;
wm.BarycentricInterpolationMode =
- wm_prog_data->barycentric_interp_modes;
+ wm_prog_data_barycentric_modes(wm_prog_data, 0);
if (wm_prog_data->early_fragment_tests)
wm.EarlyDepthStencilControl = EDSC_PREPS;
* us to run per-sample. Even when running per-sample due to gl_SampleID,
* we may still interpolate unqualified inputs at the pixel center.
*/
- bool persample_interp:1;
+ enum brw_sometimes persample_interp:2;
bool multisample_fbo:1;
enum brw_sometimes line_aa:2;
bool ignore_sample_mask_out:1;
bool coarse_pixel:1;
- uint64_t padding:58;
+ uint64_t padding:57;
};
struct brw_cs_prog_key {
BRW_BARYCENTRIC_NONPERSPECTIVE_SAMPLE = 5,
BRW_BARYCENTRIC_MODE_COUNT = 6
};
+#define BRW_BARYCENTRIC_PERSPECTIVE_BITS \
+ ((1 << BRW_BARYCENTRIC_PERSPECTIVE_PIXEL) | \
+ (1 << BRW_BARYCENTRIC_PERSPECTIVE_CENTROID) | \
+ (1 << BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE))
#define BRW_BARYCENTRIC_NONPERSPECTIVE_BITS \
((1 << BRW_BARYCENTRIC_NONPERSPECTIVE_PIXEL) | \
(1 << BRW_BARYCENTRIC_NONPERSPECTIVE_CENTROID) | \
/** True if this shader has been dispatched per-sample */
BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH = (1 << 2),
+ /** True if inputs should be interpolated per-sample by default */
+ BRW_WM_MSAA_FLAG_PERSAMPLE_INTERP = (1 << 3),
+
/** True if this shader has been dispatched coarse
*
* This is intentionally chose to be bit 18 to correspond to the coarse
return prog_data->persample_dispatch;
}
+static inline uint32_t
+wm_prog_data_barycentric_modes(const struct brw_wm_prog_data *prog_data,
+ enum brw_wm_msaa_flags pushed_msaa_flags)
+{
+ uint32_t modes = prog_data->barycentric_interp_modes;
+
+ if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_INTERP) {
+ assert(pushed_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC);
+
+ assert(prog_data->persample_dispatch == BRW_ALWAYS ||
+ (pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH));
+
+ /* Making dynamic per-sample interpolation work is a bit tricky. The
+ * hardware will hang if SAMPLE is requested but per-sample dispatch is
+ * not enabled. This means we can't preemptively add SAMPLE to the
+ * barycentrics bitfield. Instead, we have to add it late and only
+ * on-demand. Annoyingly, changing the number of barycentrics requested
+ * changes the whole PS shader payload so we very much don't want to do
+ * that. Instead, if the dynamic per-sample interpolation flag is set,
+ * we check to see if SAMPLE was requested and, if not, replace the
+ * highest barycentric bit in the [non]perspective grouping (CENTROID,
+ * if it exists, else PIXEL) with SAMPLE. The shader will stomp all the
+ * barycentrics in the shader with SAMPLE so it really doesn't matter
+ * which one we replace. The important thing is that we keep the number
+ * of barycentrics in each [non]perspective grouping the same.
+ */
+ if ((modes & BRW_BARYCENTRIC_PERSPECTIVE_BITS) &&
+ !(modes & BITFIELD_BIT(BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE))) {
+ int sample_mode =
+ util_last_bit(modes & BRW_BARYCENTRIC_PERSPECTIVE_BITS) - 1;
+ assert(modes & BITFIELD_BIT(sample_mode));
+
+ modes &= ~BITFIELD_BIT(sample_mode);
+ modes |= BITFIELD_BIT(BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE);
+ }
+
+ if ((modes & BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) &&
+ !(modes & BITFIELD_BIT(BRW_BARYCENTRIC_NONPERSPECTIVE_SAMPLE))) {
+ int sample_mode =
+ util_last_bit(modes & BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) - 1;
+ assert(modes & BITFIELD_BIT(sample_mode));
+
+ modes &= ~BITFIELD_BIT(sample_mode);
+ modes |= BITFIELD_BIT(BRW_BARYCENTRIC_NONPERSPECTIVE_SAMPLE);
+ }
+ }
+
+ return modes;
+}
+
static inline bool
brw_wm_prog_data_is_coarse(const struct brw_wm_prog_data *prog_data,
enum brw_wm_msaa_flags pushed_msaa_flags)
shader->info.fs.uses_sample_shading ||
shader->info.outputs_read;
- prog_data->persample_dispatch = BRW_NEVER;
- if (key->multisample_fbo &&
- (key->persample_interp || prog_data->sample_shading))
+ assert(key->multisample_fbo || key->persample_interp == BRW_NEVER);
+
+ prog_data->persample_dispatch = key->persample_interp;
+ if (key->multisample_fbo && prog_data->sample_shading)
prog_data->persample_dispatch = BRW_ALWAYS;
if (devinfo->ver >= 6) {
this->pixel_x = vgrf(glsl_type::float_type);
this->pixel_y = vgrf(glsl_type::float_type);
+ const struct brw_wm_prog_key *wm_key = (brw_wm_prog_key*) this->key;
struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data);
fs_reg int_sample_offset_x, int_sample_offset_y; /* Used on Gen12HP+ */
fs_reg half_int_pixel_offset_x, half_int_pixel_offset_y;
switch (wm_prog_data->coarse_pixel_dispatch) {
case BRW_NEVER:
-#define COPY_OFFSET_REG(prefix, suffix) \
- prefix##_pixel_##suffix = prefix##_sample_##suffix;
-
- COPY_OFFSET_REG(int, offset_x)
- COPY_OFFSET_REG(int, offset_y)
- COPY_OFFSET_REG(int, offset_xy)
- COPY_OFFSET_REG(half_int, offset_x)
- COPY_OFFSET_REG(half_int, offset_y)
-
-#undef COPY_OFFSET_REG
+ int_pixel_offset_x = int_sample_offset_x;
+ int_pixel_offset_y = int_sample_offset_y;
+ int_pixel_offset_xy = int_sample_offset_xy;
+ half_int_pixel_offset_x = half_int_sample_offset_x;
+ half_int_pixel_offset_y = half_int_sample_offset_y;
break;
- case BRW_SOMETIMES:
- check_dynamic_msaa_flag(bld, wm_prog_data,
- BRW_WM_MSAA_FLAG_COARSE_DISPATCH);
-
-#define COPY_OFFSET_REG(prefix, suffix) \
- prefix##_pixel_##suffix = bld.vgrf(BRW_REGISTER_TYPE_UW); \
- bld.SEL(prefix##_pixel_##suffix, \
- prefix##_coarse_##suffix, \
- prefix##_pixel_##suffix); \
+ case BRW_SOMETIMES: {
+ const fs_builder dbld =
+ abld.exec_all().group(MIN2(16, dispatch_width) * 2, 0);
- COPY_OFFSET_REG(int, offset_x)
- COPY_OFFSET_REG(int, offset_y)
- COPY_OFFSET_REG(int, offset_xy)
- COPY_OFFSET_REG(half_int, offset_x)
- COPY_OFFSET_REG(half_int, offset_y)
+ check_dynamic_msaa_flag(dbld, wm_prog_data,
+ BRW_WM_MSAA_FLAG_COARSE_DISPATCH);
-#undef COPY_OFFSET_REG
+ int_pixel_offset_x = dbld.vgrf(BRW_REGISTER_TYPE_UW);
+ set_predicate(BRW_PREDICATE_NORMAL,
+ dbld.SEL(int_pixel_offset_x,
+ int_coarse_offset_x,
+ int_sample_offset_x));
+
+ int_pixel_offset_y = dbld.vgrf(BRW_REGISTER_TYPE_UW);
+ set_predicate(BRW_PREDICATE_NORMAL,
+ dbld.SEL(int_pixel_offset_y,
+ int_coarse_offset_y,
+ int_sample_offset_y));
+
+ int_pixel_offset_xy = dbld.vgrf(BRW_REGISTER_TYPE_UW);
+ set_predicate(BRW_PREDICATE_NORMAL,
+ dbld.SEL(int_pixel_offset_xy,
+ int_coarse_offset_xy,
+ int_sample_offset_xy));
+
+ half_int_pixel_offset_x = bld.vgrf(BRW_REGISTER_TYPE_UW);
+ set_predicate(BRW_PREDICATE_NORMAL,
+ bld.SEL(half_int_pixel_offset_x,
+ half_int_coarse_offset_x,
+ half_int_sample_offset_x));
+
+ half_int_pixel_offset_y = bld.vgrf(BRW_REGISTER_TYPE_UW);
+ set_predicate(BRW_PREDICATE_NORMAL,
+ bld.SEL(half_int_pixel_offset_y,
+ half_int_coarse_offset_y,
+ half_int_sample_offset_y));
break;
+ }
case BRW_ALWAYS:
-#define COPY_OFFSET_REG(prefix, suffix) \
- prefix##_pixel_##suffix = prefix##_coarse_##suffix;
-
- COPY_OFFSET_REG(int, offset_x)
- COPY_OFFSET_REG(int, offset_y)
- COPY_OFFSET_REG(int, offset_xy)
- COPY_OFFSET_REG(half_int, offset_x)
- COPY_OFFSET_REG(half_int, offset_y)
-
-#undef COPY_OFFSET_REG
+ int_pixel_offset_x = int_coarse_offset_x;
+ int_pixel_offset_y = int_coarse_offset_y;
+ int_pixel_offset_xy = int_coarse_offset_xy;
+ half_int_pixel_offset_x = half_int_coarse_offset_x;
+ half_int_pixel_offset_y = half_int_coarse_offset_y;
break;
}
abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
}
+ if (wm_key->persample_interp == BRW_SOMETIMES) {
+ assert(!devinfo->needs_unlit_centroid_workaround);
+
+ const fs_builder ubld = bld.exec_all().group(16, 0);
+ bool loaded_flag = false;
+
+ for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
+ if (!(wm_prog_data->barycentric_interp_modes & BITFIELD_BIT(i)))
+ continue;
+
+ /* The sample mode will always be the top bit set in the perspective
+ * or non-perspective section. In the case where no SAMPLE mode was
+ * requested, wm_prog_data_barycentric_modes() will swap out the top
+ * mode for SAMPLE so this works regardless of whether SAMPLE was
+ * requested or not.
+ */
+ int sample_mode;
+ if (BITFIELD_BIT(i) & BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) {
+ sample_mode = util_last_bit(wm_prog_data->barycentric_interp_modes &
+ BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) - 1;
+ } else {
+ sample_mode = util_last_bit(wm_prog_data->barycentric_interp_modes &
+ BRW_BARYCENTRIC_PERSPECTIVE_BITS) - 1;
+ }
+ assert(wm_prog_data->barycentric_interp_modes &
+ BITFIELD_BIT(sample_mode));
+
+ if (i == sample_mode)
+ continue;
+
+ uint8_t *barys = fs_payload().barycentric_coord_reg[i];
+
+ uint8_t *sample_barys = fs_payload().barycentric_coord_reg[sample_mode];
+ assert(barys[0] && sample_barys[0]);
+
+ if (!loaded_flag) {
+ check_dynamic_msaa_flag(ubld, wm_prog_data,
+ BRW_WM_MSAA_FLAG_PERSAMPLE_INTERP);
+ }
+
+ for (unsigned j = 0; j < dispatch_width / 8; j++) {
+ fs_inst *mov =
+ ubld.MOV(brw_vec8_grf(barys[j / 2] + (j % 2) * 2, 0),
+ brw_vec8_grf(sample_barys[j / 2] + (j % 2) * 2, 0));
+ mov->predicate = BRW_PREDICATE_NORMAL;
+ }
+ }
+ }
+
for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
this->delta_xy[i] = fetch_barycentric_reg(
bld, fs_payload().barycentric_coord_reg[i]);
if (!key->multisample_fbo) {
nir_lower_single_sampled(nir);
- } else if (key->persample_interp) {
+ } else if (key->persample_interp == BRW_ALWAYS) {
nir_shader_instructions_pass(nir, lower_barycentric_per_sample,
nir_metadata_block_index |
nir_metadata_dominance,
* harmless to compute it and then let dead-code take care of it.
*/
if (ms->rasterization_samples > 1) {
- key->persample_interp = ms->sample_shading_enable &&
- (ms->min_sample_shading * ms->rasterization_samples) > 1;
+ key->persample_interp =
+ (ms->sample_shading_enable &&
+ (ms->min_sample_shading * ms->rasterization_samples) > 1) ?
+ BRW_ALWAYS : BRW_NEVER;
key->multisample_fbo = true;
}
wm_prog_data->uses_kill;
wm.BarycentricInterpolationMode =
- wm_prog_data->barycentric_interp_modes;
+ wm_prog_data_barycentric_modes(wm_prog_data, 0);
}
GENX(3DSTATE_WM_pack)(NULL, pipeline->gfx8.wm, &wm);
#if GFX_VER >= 11
ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients =
wm_prog_data->uses_depth_w_coefficients;
- ps.PixelShaderIsPerCoarsePixel = wm_prog_data->coarse_pixel_dispatch;
+ ps.PixelShaderIsPerCoarsePixel =
+ brw_wm_prog_data_is_coarse(wm_prog_data, 0);
#endif
#if GFX_VERx10 >= 125
/* TODO: We should only require this when the last geometry shader uses
* harmless to compute it and then let dead-code take care of it.
*/
if (ms->rasterization_samples > 1) {
- key->persample_interp = ms->sample_shading_enable &&
- (ms->min_sample_shading * ms->rasterization_samples) > 1;
+ key->persample_interp =
+ (ms->sample_shading_enable &&
+ (ms->min_sample_shading * ms->rasterization_samples) > 1) ?
+ BRW_ALWAYS : BRW_NEVER;
key->multisample_fbo = true;
}
#endif
wm.BarycentricInterpolationMode =
- wm_prog_data->barycentric_interp_modes;
+ wm_prog_data_barycentric_modes(wm_prog_data, 0);
#if GFX_VER < 8
wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;