v3dv: implement vkCmdFillBuffer
authorIago Toral Quiroga <itoral@igalia.com>
Wed, 12 Feb 2020 07:25:20 +0000 (08:25 +0100)
committerMarge Bot <eric+marge@anholt.net>
Tue, 13 Oct 2020 21:21:27 +0000 (21:21 +0000)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>

src/broadcom/vulkan/v3dv_meta_copy.c

index 82d214c..523f3ce 100644 (file)
@@ -767,3 +767,250 @@ v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
     */
    v3dv_job_add_extra_bo(copy_job, src_bo);
 }
+
+static void
+emit_fill_buffer_per_tile_list(struct v3dv_job *job,
+                               struct v3dv_bo *bo,
+                               uint32_t offset,
+                               uint32_t stride)
+{
+   struct v3dv_cl *cl = &job->indirect;
+   v3dv_cl_ensure_space(cl, 200, 1);
+   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
+
+   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+
+   cl_emit(cl, END_OF_LOADS, end);
+
+   cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
+      fmt.primitive_type = LIST_TRIANGLES;
+   }
+
+   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
+
+   cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
+      store.buffer_to_store = RENDER_TARGET_0;
+      store.address = v3dv_cl_address(bo, offset);
+      store.clear_buffer_being_stored = false;
+      store.output_image_format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
+      store.memory_format = VC5_TILING_RASTER;
+      store.height_in_ub_or_stride = stride;
+      store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
+   }
+
+   cl_emit(cl, END_OF_TILE_MARKER, end);
+
+   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
+
+   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+      branch.start = tile_list_start;
+      branch.end = v3dv_cl_get_address(cl);
+   }
+}
+
+static void
+emit_fill_buffer(struct v3dv_job *job,
+                 uint32_t min_x_supertile,
+                 uint32_t min_y_supertile,
+                 uint32_t max_x_supertile,
+                 uint32_t max_y_supertile,
+                 struct v3dv_bo *bo,
+                 uint32_t offset,
+                 struct v3dv_framebuffer *framebuffer)
+{
+   struct v3dv_cl *rcl = &job->rcl;
+
+   cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
+      list.address = v3dv_cl_address(job->tile_alloc, 0);
+   }
+
+   cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
+      config.number_of_bin_tile_lists = 1;
+      config.total_frame_width_in_tiles = framebuffer->draw_tiles_x;
+      config.total_frame_height_in_tiles = framebuffer->draw_tiles_y;
+
+      config.supertile_width_in_tiles = framebuffer->supertile_width;
+      config.supertile_height_in_tiles = framebuffer->supertile_height;
+
+      config.total_frame_width_in_supertiles =
+         framebuffer->frame_width_in_supertiles;
+      config.total_frame_height_in_supertiles =
+         framebuffer->frame_height_in_supertiles;
+   }
+
+   /* Implement GFXH-1742 workaround and emit a clear of the tile buffers.
+    * Since we fill by clearing, we need to do the clear here.
+    */
+   for (int i = 0; i < 2; i++) {
+      cl_emit(rcl, TILE_COORDINATES, coords);
+      cl_emit(rcl, END_OF_LOADS, end);
+      cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
+         store.buffer_to_store = NONE;
+      }
+      if (i == 0) {
+         cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
+            clear.clear_z_stencil_buffer = true;
+            clear.clear_all_render_targets = true;
+         }
+      }
+      cl_emit(rcl, END_OF_TILE_MARKER, end);
+   }
+
+   cl_emit(rcl, FLUSH_VCD_CACHE, flush);
+
+   const uint32_t stride = framebuffer->width * 4;
+   emit_fill_buffer_per_tile_list(job, bo, offset, stride);
+
+   for (int y = min_y_supertile; y <= max_y_supertile; y++) {
+      for (int x = min_x_supertile; x <= max_x_supertile; x++) {
+         cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
+            coords.column_number_in_supertiles = x;
+            coords.row_number_in_supertiles = y;
+         }
+      }
+   }
+}
+
+static void
+emit_fill_buffer_rcl(struct v3dv_job *job,
+                     struct v3dv_bo *bo,
+                     uint32_t offset,
+                     struct v3dv_framebuffer *framebuffer,
+                     uint32_t internal_type,
+                     uint32_t data)
+{
+   struct v3dv_cl *rcl = &job->rcl;
+   v3dv_cl_ensure_space_with_branch(rcl, 200 +
+                                    1 * 256 *
+                                    cl_packet_length(SUPERTILE_COORDINATES));
+
+   cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
+      config.early_z_disable = true;
+      config.image_width_pixels = framebuffer->width;
+      config.image_height_pixels = framebuffer->height;
+      config.number_of_render_targets = 1;
+      config.multisample_mode_4x = false;
+      config.maximum_bpp_of_all_render_targets = framebuffer->internal_bpp;
+   }
+
+   cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
+      clear.clear_color_low_32_bits = data;
+      clear.clear_color_next_24_bits = 0;
+      clear.render_target_number = 0;
+   };
+
+   cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
+      rt.render_target_0_internal_bpp = framebuffer->internal_bpp;
+      rt.render_target_0_internal_type = internal_type;
+      rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
+   }
+
+   cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
+      clear.z_clear_value = 0;
+      clear.stencil_clear_value = 0;
+   };
+
+   cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
+      init.use_auto_chained_tile_lists = true;
+      init.size_of_first_block_in_chained_tile_lists =
+         TILE_ALLOCATION_BLOCK_SIZE_64B;
+   }
+
+   uint32_t supertile_w_in_pixels =
+      framebuffer->tile_width * framebuffer->supertile_width;
+   uint32_t supertile_h_in_pixels =
+      framebuffer->tile_height * framebuffer->supertile_height;
+
+   const uint32_t min_x_supertile = 0;
+   const uint32_t min_y_supertile = 0;
+
+   const uint32_t max_x_supertile =
+      (framebuffer->width - 1) / supertile_w_in_pixels;
+   const uint32_t max_y_supertile =
+      (framebuffer->height - 1) / supertile_h_in_pixels;
+
+   emit_fill_buffer(job,
+                    min_x_supertile, min_y_supertile,
+                    max_x_supertile, max_y_supertile,
+                    bo, offset, framebuffer);
+
+   cl_emit(rcl, END_OF_RENDERING, end);
+}
+
+static void
+fill_buffer(struct v3dv_cmd_buffer *cmd_buffer,
+            struct v3dv_bo *bo,
+            uint32_t offset,
+            uint32_t size,
+            uint32_t data)
+{
+   assert(size > 0 && size % 4 == 0);
+   assert(offset + size <= bo->size);
+
+   const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
+   const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
+   uint32_t num_items = size / 4;
+
+   while (num_items > 0) {
+      const uint32_t max_dim_items = 4096;
+      const uint32_t max_items = max_dim_items * max_dim_items;
+      uint32_t width, height;
+      if (num_items > max_items) {
+         width = max_dim_items;
+         height = max_dim_items;
+      } else {
+         width = num_items;
+         height = 1;
+         while (width > max_dim_items ||
+                ((width % 2) == 0 && width > 2 * height)) {
+            width >>= 1;
+            height <<= 1;
+         }
+      }
+      assert(width <= max_dim_items && height <= max_dim_items);
+      assert(width * height <= num_items);
+
+      struct v3dv_framebuffer framebuffer;
+      setup_framebuffer_params(&framebuffer, width, height, 1, internal_bpp);
+
+      struct v3dv_job *job = v3dv_cmd_buffer_start_job(cmd_buffer, false);
+      v3dv_cmd_buffer_start_frame(cmd_buffer, &framebuffer);
+
+      v3dv_job_emit_binning_flush(job);
+
+      emit_fill_buffer_rcl(job, bo, offset,
+                           &framebuffer, internal_type, data);
+
+      v3dv_cmd_buffer_finish_job(cmd_buffer);
+
+      const uint32_t items_copied = width * height;
+      const uint32_t bytes_copied = items_copied * 4;
+      num_items -= items_copied;
+      offset += bytes_copied;
+   }
+}
+
+void
+v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
+                   VkBuffer dstBuffer,
+                   VkDeviceSize dstOffset,
+                   VkDeviceSize size,
+                   uint32_t data)
+{
+   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
+   V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
+
+   struct v3dv_bo *bo = dst_buffer->mem->bo;
+
+   /* From the Vulkan spec:
+    *
+    *   "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not
+    *    a multiple of 4, then the nearest smaller multiple is used."
+    */
+   if (size == VK_WHOLE_SIZE) {
+      size = dst_buffer->mem->bo->size;
+      size -= size % 4;
+   }
+
+   fill_buffer(cmd_buffer, bo, dstOffset, size, data);
+}