radv: Implement VK_EXT_vertex_attribute_divisor.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Sun, 8 Apr 2018 08:15:21 +0000 (10:15 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Thu, 12 Apr 2018 20:57:23 +0000 (22:57 +0200)
Pretty straight forward, just pass the divisors through the shader
key and then do a LLVM divide.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_extensions.py
src/amd/vulkan/radv_nir_to_llvm.c
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_private.h
src/amd/vulkan/radv_shader.h

index d912fac..a4a0ea6 100644 (file)
@@ -1002,6 +1002,12 @@ void radv_GetPhysicalDeviceProperties2(
                        properties->vgprAllocationGranularity = 4;
                        break;
                }
+               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
+                       VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
+                               (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
+                       properties->maxVertexAttribDivisor = UINT32_MAX;
+                       break;
+               }
                default:
                        break;
                }
index a680f42..db37d61 100644 (file)
@@ -93,6 +93,7 @@ EXTENSIONS = [
     Extension('VK_EXT_global_priority',                   1, 'device->rad_info.has_ctx_priority'),
     Extension('VK_EXT_sampler_filter_minmax',             1, 'device->rad_info.chip_class >= CIK'),
     Extension('VK_EXT_shader_viewport_index_layer',       1, True),
+    Extension('VK_EXT_vertex_attribute_divisor',          1, True),
     Extension('VK_AMD_draw_indirect_count',               1, True),
     Extension('VK_AMD_gcn_shader',                        1, True),
     Extension('VK_AMD_rasterization_order',               1, 'device->has_out_of_order_rast'),
index 2f0864d..a6b48e2 100644 (file)
@@ -1794,14 +1794,26 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
 
        for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
                if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + i))) {
-                       buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id,
-                                                   ctx->abi.start_instance, "");
-                       if (ctx->options->key.vs.as_ls) {
-                               ctx->shader_info->vs.vgpr_comp_cnt =
-                                       MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt);
+                       uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[index + i];
+
+                       if (divisor) {
+                               buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id,
+                                                           ctx->abi.start_instance, "");
+
+                               if (divisor != 1) {
+                                       buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index,
+                                                                    LLVMConstInt(ctx->ac.i32, divisor, 0), "");
+                               }
+
+                               if (ctx->options->key.vs.as_ls) {
+                                       ctx->shader_info->vs.vgpr_comp_cnt =
+                                               MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt);
+                               } else {
+                                       ctx->shader_info->vs.vgpr_comp_cnt =
+                                               MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt);
+                               }
                        } else {
-                               ctx->shader_info->vs.vgpr_comp_cnt =
-                                       MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt);
+                               buffer_index = ctx->ac.i32_0;
                        }
                } else
                        buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
index 6735b36..fce3201 100644 (file)
@@ -1743,22 +1743,38 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
 {
        const VkPipelineVertexInputStateCreateInfo *input_state =
                                                 pCreateInfo->pVertexInputState;
+       const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisor_state =
+               vk_find_struct_const(input_state->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
+
        struct radv_pipeline_key key;
        memset(&key, 0, sizeof(key));
 
        key.has_multiview_view_index = has_view_index;
 
        uint32_t binding_input_rate = 0;
+       uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
        for (unsigned i = 0; i < input_state->vertexBindingDescriptionCount; ++i) {
-               if (input_state->pVertexBindingDescriptions[i].inputRate)
-                       binding_input_rate |= 1u << input_state->pVertexBindingDescriptions[i].binding;
+               if (input_state->pVertexBindingDescriptions[i].inputRate) {
+                       unsigned binding = input_state->pVertexBindingDescriptions[i].binding;
+                       binding_input_rate |= 1u << binding;
+                       instance_rate_divisors[binding] = 1;
+               }
+       }
+       if (divisor_state) {
+               for (unsigned i = 0; i < divisor_state->vertexBindingDivisorCount; ++i) {
+                       instance_rate_divisors[divisor_state->pVertexBindingDivisors[i].binding] =
+                               divisor_state->pVertexBindingDivisors[i].divisor;
+               }
        }
 
        for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) {
                unsigned binding;
                binding = input_state->pVertexAttributeDescriptions[i].binding;
-               if (binding_input_rate & (1u << binding))
-                       key.instance_rate_inputs |= 1u << input_state->pVertexAttributeDescriptions[i].location;
+               if (binding_input_rate & (1u << binding)) {
+                       unsigned location = input_state->pVertexAttributeDescriptions[i].location;
+                       key.instance_rate_inputs |= 1u << location;
+                       key.instance_rate_divisors[location] = instance_rate_divisors[binding];
+               }
        }
 
        if (pCreateInfo->pTessellationState)
@@ -1787,6 +1803,8 @@ radv_fill_shader_keys(struct radv_shader_variant_key *keys,
                       nir_shader **nir)
 {
        keys[MESA_SHADER_VERTEX].vs.instance_rate_inputs = key->instance_rate_inputs;
+       for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; ++i)
+               keys[MESA_SHADER_VERTEX].vs.instance_rate_divisors[i] = key->instance_rate_divisors[i];
 
        if (nir[MESA_SHADER_TESS_CTRL]) {
                keys[MESA_SHADER_VERTEX].vs.as_ls = true;
index 1bcc3a9..fb94c50 100644 (file)
@@ -347,6 +347,7 @@ struct radv_pipeline_cache {
 
 struct radv_pipeline_key {
        uint32_t instance_rate_inputs;
+       uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
        unsigned tess_input_vertices;
        uint32_t col_format;
        uint32_t is_int8;
index cbb7394..6588b78 100644 (file)
@@ -57,6 +57,7 @@ struct radv_shader_module {
 
 struct radv_vs_variant_key {
        uint32_t instance_rate_inputs;
+       uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
        uint32_t as_es:1;
        uint32_t as_ls:1;
        uint32_t export_prim_id:1;