* needed. However, we keep a large stack size to avoid it being put into registers, which hurts
* occupancy. */
#define MAX_SCRATCH_STACK_ENTRY_COUNT 76
-#define MAX_SHARED_STACK_ENTRY_COUNT 8
typedef struct {
nir_variable *variable;
rq_variable *stack;
uint32_t shared_base;
+ uint32_t stack_entries;
};
#define VAR_NAME(name) \
uint32_t workgroup_size = shader->info.workgroup_size[0] * shader->info.workgroup_size[1] *
shader->info.workgroup_size[2];
- uint32_t shared_stack_size = workgroup_size * MAX_SHARED_STACK_ENTRY_COUNT * 4;
+ uint32_t shared_stack_entries = shader->info.ray_queries == 1 ? 16 : 8;
+ uint32_t shared_stack_size = workgroup_size * shared_stack_entries * 4;
uint32_t shared_offset = align(shader->info.shared_size, 4);
if (shader->info.stage != MESA_SHADER_COMPUTE || array_length > 1 ||
shared_offset + shared_stack_size > max_shared_size) {
dst->stack = rq_variable_create(
dst, shader, array_length,
glsl_array_type(glsl_uint_type(), MAX_SCRATCH_STACK_ENTRY_COUNT, 0), VAR_NAME("_stack"));
+ dst->stack_entries = MAX_SCRATCH_STACK_ENTRY_COUNT;
} else {
dst->stack = NULL;
dst->shared_base = shared_offset;
+ dst->stack_entries = shared_stack_entries;
+
shader->info.shared_size = shared_offset + shared_stack_size;
}
}
.tmin = rq_load_var(b, index, vars->tmin),
.dir = rq_load_var(b, index, vars->direction),
.vars = trav_vars,
+ .stack_entries = vars->stack_entries,
.stack_store_cb = store_stack_entry,
.stack_load_cb = load_stack_entry,
.aabb_cb = handle_candidate_aabb,
if (vars->stack) {
args.stack_stride = 1;
- args.stack_entries = MAX_SCRATCH_STACK_ENTRY_COUNT;
args.stack_base = 0;
} else {
uint32_t workgroup_size = b->shader->info.workgroup_size[0] *
b->shader->info.workgroup_size[1] *
b->shader->info.workgroup_size[2];
args.stack_stride = workgroup_size * 4;
- args.stack_entries = MAX_SHARED_STACK_ENTRY_COUNT;
args.stack_base = vars->shared_base;
}