unsigned last_num_draw_calls;
unsigned flags; /* flush flags */
- /* Atoms (direct states). */
+ /* Atoms (state emit functions). */
union si_state_atoms atoms;
- unsigned dirty_atoms; /* mask */
- /* PM4 states (precomputed immutable states) */
- unsigned dirty_states;
+ uint64_t dirty_atoms; /* mask */
union si_state queued;
union si_state emitted;
/* Gfx11+: Buffered SH registers for SET_SH_REG_PAIRS_PACKED*. */
return 2048 + sctx->num_cs_dw_queries_suspend + num_draws * 10;
}
-static inline unsigned si_get_atom_bit(struct si_context *sctx, struct si_atom *atom)
+static inline uint64_t si_get_atom_bit(struct si_context *sctx, struct si_atom *atom)
{
- return 1 << (atom - sctx->atoms.array);
+ return 1ull << (atom - sctx->atoms.array);
}
static inline void si_set_atom_dirty(struct si_context *sctx, struct si_atom *atom, bool dirty)
{
- unsigned bit = si_get_atom_bit(sctx, atom);
+ uint64_t bit = si_get_atom_bit(sctx, atom);
if (dirty)
sctx->dirty_atoms |= bit;
if (sctx->queued.array[idx] == state) {
sctx->queued.array[idx] = NULL;
- sctx->dirty_states &= ~BITFIELD_BIT(idx);
+ sctx->dirty_atoms &= ~BITFIELD64_BIT(idx);
}
}
for (unsigned i = 0; i < SI_NUM_STATES; i++) {
if (sctx->queued.array[i])
- sctx->dirty_states |= BITFIELD_BIT(i);
+ sctx->dirty_atoms |= BITFIELD64_BIT(i);
}
}
void si_init_state_functions(struct si_context *sctx)
{
+ for (unsigned i = 0; i < ARRAY_SIZE(sctx->atoms.s.pm4_states); i++)
+ sctx->atoms.s.pm4_states[i].emit = si_pm4_emit_state;
+
sctx->atoms.s.framebuffer.emit = si_emit_framebuffer_state;
sctx->atoms.s.db_render_state.emit = si_emit_db_render_state;
sctx->atoms.s.dpbb_state.emit = si_emit_dpbb_state;
};
#define SI_STATE_IDX(name) (offsetof(union si_state, named.name) / sizeof(struct si_pm4_state *))
-#define SI_STATE_BIT(name) (1 << SI_STATE_IDX(name))
+#define SI_STATE_BIT(name) (1ull << SI_STATE_IDX(name))
#define SI_NUM_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *))
-static inline unsigned si_states_that_always_roll_context(void)
-{
- return (SI_STATE_BIT(blend) | SI_STATE_BIT(rasterizer) | SI_STATE_BIT(dsa) |
- SI_STATE_BIT(poly_offset));
-}
-
union si_state_atoms {
struct si_atoms_s {
- /* The order matters. */
+ /* This must be first. */
+ struct si_atom pm4_states[SI_NUM_STATES];
struct si_atom render_cond;
struct si_atom streamout_begin;
struct si_atom streamout_enable; /* must be after streamout_begin */
struct si_atom array[sizeof(struct si_atoms_s) / sizeof(struct si_atom)];
};
-#define SI_ATOM_BIT(name) (1 << (offsetof(union si_state_atoms, s.name) / sizeof(struct si_atom)))
+#define SI_ATOM_BIT(name) (1ull << (offsetof(union si_state_atoms, s.name) / sizeof(struct si_atom)))
#define SI_NUM_ATOMS (sizeof(union si_state_atoms) / sizeof(struct si_atom))
-static inline unsigned si_atoms_that_always_roll_context(void)
+static inline uint64_t si_atoms_that_always_roll_context(void)
{
- return (SI_ATOM_BIT(streamout_begin) | SI_ATOM_BIT(streamout_enable) | SI_ATOM_BIT(framebuffer) |
- SI_ATOM_BIT(sample_locations) | SI_ATOM_BIT(sample_mask) | SI_ATOM_BIT(blend_color) |
- SI_ATOM_BIT(clip_state) | SI_ATOM_BIT(scissors) | SI_ATOM_BIT(viewports) |
- SI_ATOM_BIT(stencil_ref) | SI_ATOM_BIT(scratch_state) | SI_ATOM_BIT(window_rectangles));
+ return SI_STATE_BIT(blend) | SI_STATE_BIT(rasterizer) | SI_STATE_BIT(dsa) |
+ SI_STATE_BIT(poly_offset) |
+ SI_ATOM_BIT(streamout_begin) | SI_ATOM_BIT(streamout_enable) | SI_ATOM_BIT(framebuffer) |
+ SI_ATOM_BIT(sample_locations) | SI_ATOM_BIT(sample_mask) | SI_ATOM_BIT(blend_color)|
+ SI_ATOM_BIT(clip_state) | SI_ATOM_BIT(scissors) | SI_ATOM_BIT(viewports)|
+ SI_ATOM_BIT(stencil_ref) | SI_ATOM_BIT(scratch_state) | SI_ATOM_BIT(window_rectangles);
}
struct si_shader_data {
do { \
(sctx)->queued.named.member = (value); \
if (value && value != (sctx)->emitted.named.member) \
- (sctx)->dirty_states |= SI_STATE_BIT(member); \
+ (sctx)->dirty_atoms |= SI_STATE_BIT(member); \
else \
- (sctx)->dirty_states &= ~SI_STATE_BIT(member); \
+ (sctx)->dirty_atoms &= ~SI_STATE_BIT(member); \
} while (0)
/* si_descriptors.c */
}
ALWAYS_INLINE
-static void si_emit_all_states(struct si_context *sctx, unsigned skip_atom_mask)
+static void si_emit_all_states(struct si_context *sctx, uint64_t skip_atom_mask)
{
- /* Emit state atoms. */
- unsigned mask = sctx->dirty_atoms & ~skip_atom_mask;
- if (mask) {
- do {
- unsigned i = u_bit_scan(&mask);
- sctx->atoms.array[i].emit(sctx, i);
- } while (mask);
+ /* Emit states by calling their emit functions. */
+ uint64_t dirty = sctx->dirty_atoms & ~skip_atom_mask;
+ if (dirty) {
sctx->dirty_atoms &= skip_atom_mask;
- }
- /* Emit states. */
- mask = sctx->dirty_states;
- if (mask) {
- do {
- unsigned i = u_bit_scan(&mask);
- si_pm4_emit_state(sctx, i);
- } while (mask);
+ /* u_bit_scan64 is too slow on i386. */
+ if (sizeof(void*) == 8) {
+ do {
+ unsigned i = u_bit_scan64(&dirty);
+ sctx->atoms.array[i].emit(sctx, i);
+ } while (dirty);
+ } else {
+ unsigned dirty_lo = dirty;
+ unsigned dirty_hi = dirty >> 32;
- sctx->dirty_states = 0;
+ while (dirty_lo) {
+ unsigned i = u_bit_scan(&dirty_lo);
+ sctx->atoms.array[i].emit(sctx, i);
+ }
+ while (dirty_hi) {
+ unsigned i = 32 + u_bit_scan(&dirty_hi);
+ sctx->atoms.array[i].emit(sctx, i);
+ }
+ }
}
}
* It's better to draw before prefetches because we want to start fetching indices before
* shaders. The idea is to minimize the time when the CUs are idle.
*/
- unsigned masked_atoms = 0;
+ uint64_t masked_atoms = 0;
if (unlikely(sctx->flags & SI_CONTEXT_FLUSH_FOR_RENDER_COND)) {
/* The render condition state should be emitted after cache flushes. */
masked_atoms |= si_get_atom_bit(sctx, &sctx->atoms.s.render_cond);
gfx9_scissor_bug = true;
if ((!IS_DRAW_VERTEX_STATE && indirect && indirect->count_from_stream_output) ||
- sctx->dirty_atoms & si_atoms_that_always_roll_context() ||
- sctx->dirty_states & si_states_that_always_roll_context())
+ sctx->dirty_atoms & si_atoms_that_always_roll_context())
sctx->context_roll = true;
}