radv: start allocating user sgprs
authorDave Airlie <airlied@redhat.com>
Mon, 17 Apr 2017 20:18:01 +0000 (06:18 +1000)
committerDave Airlie <airlied@redhat.com>
Tue, 18 Apr 2017 23:00:43 +0000 (09:00 +1000)
This adds an initial implementation to allocate the user
sgprs and make sure we don't run out if we try to bind
a bunch of descriptor sets.

This can be enhanced further in the future if we add
support for inlining push constants.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/amd/common/ac_nir_to_llvm.c

index 1ca27d5..31c3a6c 100644 (file)
@@ -556,16 +556,15 @@ static void declare_tess_lds(struct nir_to_llvm_context *ctx)
                "tess_lds");
 }
 
-static void create_function(struct nir_to_llvm_context *ctx)
+struct user_sgpr_info {
+       bool need_ring_offsets;
+       uint8_t sgpr_count;
+};
+
+static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
+                               struct user_sgpr_info *user_sgpr_info)
 {
-       LLVMTypeRef arg_types[23];
-       unsigned arg_idx = 0;
-       unsigned array_params_mask = 0;
-       unsigned sgpr_count = 0, user_sgpr_count;
-       unsigned i;
-       unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0;
-       unsigned user_sgpr_idx;
-       bool need_ring_offsets = false;
+       memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
 
        /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */
        if (ctx->stage == MESA_SHADER_GEOMETRY ||
@@ -573,13 +572,75 @@ static void create_function(struct nir_to_llvm_context *ctx)
            ctx->stage == MESA_SHADER_TESS_CTRL ||
            ctx->stage == MESA_SHADER_TESS_EVAL ||
            ctx->is_gs_copy_shader)
-               need_ring_offsets = true;
+               user_sgpr_info->need_ring_offsets = true;
 
        if (ctx->stage == MESA_SHADER_FRAGMENT &&
            ctx->shader_info->info.ps.needs_sample_positions)
-               need_ring_offsets = true;
+               user_sgpr_info->need_ring_offsets = true;
+
+       /* 2 user sgprs will nearly always be allocated for scratch/rings */
+       if (ctx->options->supports_spill || user_sgpr_info->need_ring_offsets) {
+               user_sgpr_info->sgpr_count += 2;
+       }
+
+       switch (ctx->stage) {
+       case MESA_SHADER_COMPUTE:
+               user_sgpr_info->sgpr_count += ctx->shader_info->info.cs.grid_components_used;
+               break;
+       case MESA_SHADER_FRAGMENT:
+               user_sgpr_info->sgpr_count += ctx->shader_info->info.ps.needs_sample_positions;
+               break;
+       case MESA_SHADER_VERTEX:
+               if (!ctx->is_gs_copy_shader) {
+                       user_sgpr_info->sgpr_count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0;
+                       if (ctx->shader_info->info.vs.needs_draw_id) {
+                               user_sgpr_info->sgpr_count += 3;
+                       } else {
+                               user_sgpr_info->sgpr_count += 2;
+                       }
+               }
+               if (ctx->options->key.vs.as_ls)
+                       user_sgpr_info->sgpr_count++;
+               break;
+       case MESA_SHADER_TESS_CTRL:
+               user_sgpr_info->sgpr_count += 4;
+               break;
+       case MESA_SHADER_TESS_EVAL:
+               user_sgpr_info->sgpr_count += 1;
+               break;
+       case MESA_SHADER_GEOMETRY:
+               user_sgpr_info->sgpr_count += 2;
+               break;
+       default:
+               break;
+       }
+
+       if (ctx->shader_info->info.needs_push_constants)
+               user_sgpr_info->sgpr_count += 2;
+
+       uint32_t remaining_sgprs = 16 - user_sgpr_info->sgpr_count;
+       if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) {
+               fprintf(stderr, "radv: TODO: add support for indirect sgprs\n");
+               /* need to add support for indirect descriptor sets */
+               assert(0);
+       } else {
+               user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2;
+       }
+}
+
+static void create_function(struct nir_to_llvm_context *ctx)
+{
+       LLVMTypeRef arg_types[23];
+       unsigned arg_idx = 0;
+       unsigned array_params_mask = 0;
+       unsigned sgpr_count = 0, user_sgpr_count;
+       unsigned i;
+       unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0;
+       unsigned user_sgpr_idx;
+       struct user_sgpr_info user_sgpr_info;
 
-       if (need_ring_offsets && !ctx->options->supports_spill) {
+       allocate_user_sgprs(ctx, &user_sgpr_info);
+       if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) {
                arg_types[arg_idx++] = const_array(ctx->v16i8, 16); /* address of rings */
        }
 
@@ -727,7 +788,7 @@ static void create_function(struct nir_to_llvm_context *ctx)
        arg_idx = 0;
        user_sgpr_idx = 0;
 
-       if (ctx->options->supports_spill || need_ring_offsets) {
+       if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) {
                set_userdata_location_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS, user_sgpr_idx, 2);
                user_sgpr_idx += 2;
                if (ctx->options->supports_spill) {