'nvk_cmd_buffer.c',
'nvk_cmd_buffer.h',
'nvk_cmd_copy.c',
+ 'nvk_cmd_dispatch.c',
+ 'nvk_compute_pipeline.c',
'nvk_descriptor_set.h',
'nvk_descriptor_set.c',
'nvk_descriptor_set_layout.c',
#include "nvk_descriptor_set.h"
#include "nvk_device.h"
+#include "nvk_pipeline.h"
#include "nvk_physical_device.h"
#include "nouveau_push.h"
+#include "nouveau_context.h"
+
+#include "nouveau/nouveau.h"
#include "nvk_cla0c0.h"
return nvk_reset_cmd_buffer(cmd_buffer);
}
-static uint64_t
-calc_tls_size(struct nvk_device *device,
- uint32_t lpos, uint32_t lneg, uint32_t cstack)
-{
- uint64_t size = (lpos + lneg) * 32 + cstack;
-
- assert (size < (1 << 20));
-
- size *= 64; /* max warps */
- size = align(size, 0x8000);
- size *= device->pdev->dev->mp_count;
-
- size = align(size, 1 << 17);
- return size;
-}
-
-static void
-nve4_begin_compute(struct nvk_cmd_buffer *cmd)
-{
- struct nvk_device *dev = (struct nvk_device *)cmd->vk.base.device;
-
- cmd->tls_space_needed = calc_tls_size(dev, 128 * 16, 0, 0x200);
-}
-
VKAPI_ATTR VkResult VKAPI_CALL
nvk_BeginCommandBuffer(VkCommandBuffer commandBuffer,
const VkCommandBufferBeginInfo *pBeginInfo)
else
cmd->reset_on_submit = false;
- struct nvk_device *dev = (struct nvk_device *)cmd->vk.base.device;
- struct nvk_physical_device *pdev = dev->pdev;
-
- /* this could be made optional for non-compute cmdbuffers */
- if (pdev->dev->cls >= 0xa0)
- nve4_begin_compute(cmd);
+ nvk_cmd_buffer_begin_compute(cmd, pBeginInfo);
return VK_SUCCESS;
}
const VkDependencyInfo *pDependencyInfo)
{ }
+VKAPI_ATTR void VKAPI_CALL
+nvk_CmdBindPipeline(VkCommandBuffer commandBuffer,
+ VkPipelineBindPoint pipelineBindPoint,
+ VkPipeline _pipeline)
+{
+ VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(nvk_pipeline, pipeline, _pipeline);
+
+ switch (pipelineBindPoint) {
+ case VK_PIPELINE_BIND_POINT_COMPUTE:
+ assert(pipeline->type == NVK_PIPELINE_COMPUTE);
+ nouveau_ws_push_ref(cmd->push,
+ pipeline->shaders[MESA_SHADER_COMPUTE].bo,
+ NOUVEAU_WS_BO_RD);
+ cmd->state.cs.pipeline = (struct nvk_compute_pipeline *)pipeline;
+ break;
+ default:
+ unreachable("Unhandled bind point");
+ }
+}
VKAPI_ATTR void VKAPI_CALL
nvk_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
#include "nvk_private.h"
+#include "nouveau_push.h"
+
#include "vulkan/runtime/vk_command_buffer.h"
#include "vulkan/runtime/vk_command_pool.h"
/** Root descriptor table. This gets pushed to the GPU directly */
struct nvk_root_descriptor_table {
+ union {
+ struct {
+ uint32_t block_size[3];
+ uint32_t grid_size[3];
+ uint32_t _pad[2];
+ } cs;
+ };
+
/* Client push constants */
uint8_t push[128];
};
struct nvk_compute_state {
+ struct nvk_compute_pipeline *pipeline;
struct nvk_descriptor_state descriptors;
};
};
VkResult nvk_reset_cmd_buffer(struct nvk_cmd_buffer *cmd_buffer);
+void nvk_cmd_buffer_begin_compute(struct nvk_cmd_buffer *cmd,
+ const VkCommandBufferBeginInfo *pBeginInfo);
+
VK_DEFINE_HANDLE_CASTS(nvk_cmd_buffer, vk.base, VkCommandBuffer,
VK_OBJECT_TYPE_COMMAND_BUFFER)
bool
nvk_cmd_buffer_upload_alloc(struct nvk_cmd_buffer *cmd_buffer, unsigned size,
uint64_t *addr, void **ptr);
+
#endif
--- /dev/null
+#include "nvk_cmd_buffer.h"
+#include "nvk_descriptor_set.h"
+#include "nvk_device.h"
+#include "nvk_physical_device.h"
+#include "nvk_pipeline.h"
+
+#include "classes/cla0b5.h"
+
+#include "nvk_cla0c0.h"
+#include "cla1c0.h"
+#include "nvk_clc3c0.h"
+
+#include "drf.h"
+#include "cla0c0qmd.h"
+#include "clc0c0qmd.h"
+#include "clc3c0qmd.h"
+
+#define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a)
+#define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a)
+#define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a)
+#define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a)
+#define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a)
+#define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a)
+
+static uint64_t
+calc_tls_size(struct nvk_device *device,
+ uint32_t lpos, uint32_t lneg, uint32_t cstack)
+{
+ uint64_t size = (lpos + lneg) * 32 + cstack;
+
+ assert (size < (1 << 20));
+
+ size *= 64; /* max warps */
+ size = align(size, 0x8000);
+ size *= device->pdev->dev->mp_count;
+
+ size = align(size, 1 << 17);
+ return size;
+}
+
+void
+nvk_cmd_buffer_begin_compute(struct nvk_cmd_buffer *cmd,
+ const VkCommandBufferBeginInfo *pBeginInfo)
+{
+ struct nvk_device *dev = (struct nvk_device *)cmd->vk.base.device;
+ struct nvk_physical_device *pdev = dev->pdev;
+
+ if (pdev->dev->cls < 0xa0)
+ return;
+
+ cmd->tls_space_needed = calc_tls_size(dev, 128 * 16, 0, 0x200);
+}
+
+static void
+gv100_compute_setup_launch_desc(uint32_t *qmd,
+ uint32_t x, uint32_t y, uint32_t z)
+{
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_WIDTH, x);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_HEIGHT, y);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_DEPTH, z);
+}
+
+static inline void
+gp100_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index,
+ uint32_t size, uint64_t address)
+{
+ NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address);
+ NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32);
+ NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, index,
+ DIV_ROUND_UP(size, 16));
+ NVC0C0_QMDV02_01_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+nvk_CmdDispatch(VkCommandBuffer commandBuffer,
+ uint32_t groupCountX,
+ uint32_t groupCountY,
+ uint32_t groupCountZ)
+{
+ VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
+ const struct nvk_compute_pipeline *pipeline = cmd->state.cs.pipeline;
+ const struct nvk_shader *shader =
+ &pipeline->base.shaders[MESA_SHADER_COMPUTE];
+ struct nvk_descriptor_state *desc = &cmd->state.cs.descriptors;
+
+ desc->root.cs.block_size[0] = shader->cp.block_size[0];
+ desc->root.cs.block_size[1] = shader->cp.block_size[1];
+ desc->root.cs.block_size[2] = shader->cp.block_size[2];
+ desc->root.cs.grid_size[0] = groupCountX;
+ desc->root.cs.grid_size[1] = groupCountY;
+ desc->root.cs.grid_size[2] = groupCountZ;
+
+ uint32_t root_table_size = sizeof(desc->root);
+ void *root_table_map;
+ uint64_t root_table_addr;
+ if (!nvk_cmd_buffer_upload_alloc(cmd, root_table_size, &root_table_addr,
+ &root_table_map))
+ return; /* TODO: Error */
+
+ P_MTHD(cmd->push, NVA0C0, OFFSET_OUT_UPPER);
+ P_NVA0C0_OFFSET_OUT_UPPER(cmd->push, root_table_addr >> 32);
+ P_NVA0C0_OFFSET_OUT(cmd->push, root_table_addr & 0xffffffff);
+ P_MTHD(cmd->push, NVA0C0, LINE_LENGTH_IN);
+ P_NVA0C0_LINE_LENGTH_IN(cmd->push, root_table_size);
+ P_NVA0C0_LINE_COUNT(cmd->push, 0x1);
+
+ P_1INC(cmd->push, NVA0C0, LAUNCH_DMA);
+ P_NVA0C0_LAUNCH_DMA(cmd->push,
+ { .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
+ .sysmembar_disable = SYSMEMBAR_DISABLE_TRUE });
+ P_INLINE_ARRAY(cmd->push, (uint32_t *)&desc->root, root_table_size / 4);
+
+ uint32_t *qmd;
+ uint64_t qmd_addr;
+ if (!nvk_cmd_buffer_upload_alloc(cmd, 512, &qmd_addr, (void **)&qmd))
+ return; /* TODO: Error */
+
+ memcpy(qmd, pipeline->qmd_template, 256);
+ gv100_compute_setup_launch_desc(qmd, groupCountX, groupCountY, groupCountZ);
+
+ gp100_cp_launch_desc_set_cb(qmd, 0, 256, root_table_addr);
+ gp100_cp_launch_desc_set_cb(qmd, 1, 256, root_table_addr);
+
+ P_MTHD(cmd->push, NVA0C0, INVALIDATE_SHADER_CACHES_NO_WFI);
+ P_NVA0C0_INVALIDATE_SHADER_CACHES_NO_WFI(cmd->push, { .constant = CONSTANT_TRUE });
+
+ P_MTHD(cmd->push, NVA0C0, SEND_PCAS_A);
+ P_NVA0C0_SEND_PCAS_A(cmd->push, qmd_addr >> 8);
+ P_IMMD(cmd->push, NVA0C0, SEND_SIGNALING_PCAS_B,
+ { .invalidate = INVALIDATE_TRUE,
+ .schedule = SCHEDULE_TRUE });
+}
--- /dev/null
+#include "nvk_private.h"
+#include "nvk_device.h"
+#include "nvk_pipeline.h"
+#include "nvk_pipeline_layout.h"
+#include "nvk_shader.h"
+
+#include "nouveau_bo.h"
+#include "vk_shader_module.h"
+
+#include "drf.h"
+#include "cla0c0qmd.h"
+#include "clc0c0qmd.h"
+#include "clc3c0qmd.h"
+#define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a)
+#define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a)
+#define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a)
+#define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a)
+#define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a)
+#define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a)
+
+static int
+gv100_sm_config_smem_size(uint32_t size)
+{
+ if (size > 64 * 1024) size = 96 * 1024;
+ else if (size > 32 * 1024) size = 64 * 1024;
+ else if (size > 16 * 1024) size = 32 * 1024;
+ else if (size > 8 * 1024) size = 16 * 1024;
+ else size = 8 * 1024;
+ return (size / 4096) + 1;
+}
+
+static void
+gv100_compute_setup_launch_desc_template(uint32_t *qmd,
+ struct nvk_shader *shader)
+{
+ NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
+ NVC3C0_QMDV02_02_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
+ NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, INDEPENDENTLY);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE,
+ align(shader->cp.smem_size, 0x100));
+ NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,
+ (shader->hdr[1] & 0xfffff0) +
+ align(shader->cp.lmem_size, 0x10));
+ NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
+ gv100_sm_config_smem_size(8 * 1024));
+ NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
+ gv100_sm_config_smem_size(96 * 1024));
+ NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_VERSION, 2);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_MAJOR_VERSION, 2);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
+ gv100_sm_config_smem_size(shader->cp.smem_size));
+
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION0, shader->cp.block_size[0]);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION1, shader->cp.block_size[1]);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION2, shader->cp.block_size[2]);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, shader->num_gprs);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, BARRIER_COUNT, shader->num_barriers);
+
+ uint64_t entry = shader->bo->offset;
+ NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, entry & 0xffffffff);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, entry >> 32);
+
+}
+
+VkResult
+nvk_compute_pipeline_create(struct nvk_device *device,
+ struct vk_pipeline_cache *cache,
+ const VkComputePipelineCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipeline)
+{
+ VK_FROM_HANDLE(nvk_pipeline_layout, pipeline_layout, pCreateInfo->layout);
+ struct nvk_physical_device *pdevice = nvk_device_physical(device);
+ struct nvk_compute_pipeline *pipeline;
+ VkResult result;
+
+ pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
+ VK_OBJECT_TYPE_PIPELINE);
+ if (pipeline == NULL)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ pipeline->base.type = NVK_PIPELINE_COMPUTE;
+
+ assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT);
+ VK_FROM_HANDLE(vk_shader_module, module, pCreateInfo->stage.module);
+
+ nir_shader *nir;
+ result = nvk_shader_compile_to_nir(device, module,
+ pCreateInfo->stage.pName,
+ MESA_SHADER_COMPUTE,
+ pCreateInfo->stage.pSpecializationInfo,
+ pipeline_layout, &nir);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ result = nvk_compile_nir(pdevice, nir, &pipeline->base.shaders[MESA_SHADER_COMPUTE]);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ nvk_shader_upload(pdevice, &pipeline->base.shaders[MESA_SHADER_COMPUTE]);
+ gv100_compute_setup_launch_desc_template(pipeline->qmd_template, &pipeline->base.shaders[MESA_SHADER_COMPUTE]);
+ *pPipeline = nvk_pipeline_to_handle(&pipeline->base);
+ return VK_SUCCESS;
+
+fail:
+ vk_object_free(&device->vk, pAllocator, pipeline);
+ return result;
+}
#include "nvk_format.h"
#include "nvk_image.h"
#include "nvk_instance.h"
+#include "nvk_shader.h"
#include "nvk_wsi.h"
#include "vulkan/runtime/vk_device.h"
#include "vulkan/util/vk_enum_defines.h"
#include "vulkan/wsi/wsi_common.h"
+#include "vulkan/util/vk_enum_defines.h"
+
+#include "cl90c0.h"
+#include "cl91c0.h"
+#include "cla0c0.h"
+#include "cla1c0.h"
+#include "clb0c0.h"
+#include "clb1c0.h"
+#include "clc0c0.h"
+#include "clc1c0.h"
+#include "clc3c0.h"
+#include "clc5c0.h"
+
+
VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceFeatures2 *pFeatures)
unreachable("Graphics pipelines not yet implemented");
}
-static VkResult
-nvk_compute_pipeline_create(struct nvk_device *device,
- struct vk_pipeline_cache *cache,
- const VkComputePipelineCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkPipeline *pPipeline)
-{
- unreachable("Compute pipelines not yet implemented");
-}
-
VKAPI_ATTR VkResult VKAPI_CALL
nvk_CreateGraphicsPipelines(VkDevice _device,
VkPipelineCache pipelineCache,
#include "nvk_shader.h"
#include "vk_object.h"
+struct vk_pipeline_cache;
+
enum nvk_pipeline_type {
NVK_PIPELINE_GRAPHICS,
NVK_PIPELINE_COMPUTE,
VK_DEFINE_NONDISP_HANDLE_CASTS(nvk_pipeline, base, VkPipeline,
VK_OBJECT_TYPE_PIPELINE)
+
+struct nvk_compute_pipeline {
+ struct nvk_pipeline base;
+
+ uint32_t qmd_template[64];
+};
+
+VkResult
+nvk_compute_pipeline_create(struct nvk_device *device,
+ struct vk_pipeline_cache *cache,
+ const VkComputePipelineCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipeline);
+
#endif