vfio/mlx5: Create and destroy page tracker object
authorYishai Hadas <yishaih@nvidia.com>
Thu, 8 Sep 2022 18:34:45 +0000 (21:34 +0300)
committerAlex Williamson <alex.williamson@redhat.com>
Thu, 8 Sep 2022 18:59:01 +0000 (12:59 -0600)
Add support for creating and destroying page tracker object.

This object is used to control/report the device dirty pages.

As part of creating the tracker need to consider the device capabilities
for max ranges and adapt/combine ranges accordingly.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20220908183448.195262-8-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
drivers/vfio/pci/mlx5/cmd.c
drivers/vfio/pci/mlx5/cmd.h

index 0a36279..f1cad96 100644 (file)
@@ -410,6 +410,148 @@ end:
        return err;
 }
 
+static void combine_ranges(struct rb_root_cached *root, u32 cur_nodes,
+                          u32 req_nodes)
+{
+       struct interval_tree_node *prev, *curr, *comb_start, *comb_end;
+       unsigned long min_gap;
+       unsigned long curr_gap;
+
+       /* Special shortcut when a single range is required */
+       if (req_nodes == 1) {
+               unsigned long last;
+
+               curr = comb_start = interval_tree_iter_first(root, 0, ULONG_MAX);
+               while (curr) {
+                       last = curr->last;
+                       prev = curr;
+                       curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
+                       if (prev != comb_start)
+                               interval_tree_remove(prev, root);
+               }
+               comb_start->last = last;
+               return;
+       }
+
+       /* Combine ranges which have the smallest gap */
+       while (cur_nodes > req_nodes) {
+               prev = NULL;
+               min_gap = ULONG_MAX;
+               curr = interval_tree_iter_first(root, 0, ULONG_MAX);
+               while (curr) {
+                       if (prev) {
+                               curr_gap = curr->start - prev->last;
+                               if (curr_gap < min_gap) {
+                                       min_gap = curr_gap;
+                                       comb_start = prev;
+                                       comb_end = curr;
+                               }
+                       }
+                       prev = curr;
+                       curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
+               }
+               comb_start->last = comb_end->last;
+               interval_tree_remove(comb_end, root);
+               cur_nodes--;
+       }
+}
+
+static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev,
+                                struct mlx5vf_pci_core_device *mvdev,
+                                struct rb_root_cached *ranges, u32 nnodes)
+{
+       int max_num_range =
+               MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_max_num_range);
+       struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker;
+       int record_size = MLX5_ST_SZ_BYTES(page_track_range);
+       u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+       struct interval_tree_node *node = NULL;
+       u64 total_ranges_len = 0;
+       u32 num_ranges = nnodes;
+       u8 log_addr_space_size;
+       void *range_list_ptr;
+       void *obj_context;
+       void *cmd_hdr;
+       int inlen;
+       void *in;
+       int err;
+       int i;
+
+       if (num_ranges > max_num_range) {
+               combine_ranges(ranges, nnodes, max_num_range);
+               num_ranges = max_num_range;
+       }
+
+       inlen = MLX5_ST_SZ_BYTES(create_page_track_obj_in) +
+                                record_size * num_ranges;
+       in = kzalloc(inlen, GFP_KERNEL);
+       if (!in)
+               return -ENOMEM;
+
+       cmd_hdr = MLX5_ADDR_OF(create_page_track_obj_in, in,
+                              general_obj_in_cmd_hdr);
+       MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode,
+                MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+       MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type,
+                MLX5_OBJ_TYPE_PAGE_TRACK);
+       obj_context = MLX5_ADDR_OF(create_page_track_obj_in, in, obj_context);
+       MLX5_SET(page_track, obj_context, vhca_id, mvdev->vhca_id);
+       MLX5_SET(page_track, obj_context, track_type, 1);
+       MLX5_SET(page_track, obj_context, log_page_size,
+                ilog2(tracker->host_qp->tracked_page_size));
+       MLX5_SET(page_track, obj_context, log_msg_size,
+                ilog2(tracker->host_qp->max_msg_size));
+       MLX5_SET(page_track, obj_context, reporting_qpn, tracker->fw_qp->qpn);
+       MLX5_SET(page_track, obj_context, num_ranges, num_ranges);
+
+       range_list_ptr = MLX5_ADDR_OF(page_track, obj_context, track_range);
+       node = interval_tree_iter_first(ranges, 0, ULONG_MAX);
+       for (i = 0; i < num_ranges; i++) {
+               void *addr_range_i_base = range_list_ptr + record_size * i;
+               unsigned long length = node->last - node->start;
+
+               MLX5_SET64(page_track_range, addr_range_i_base, start_address,
+                          node->start);
+               MLX5_SET64(page_track_range, addr_range_i_base, length, length);
+               total_ranges_len += length;
+               node = interval_tree_iter_next(node, 0, ULONG_MAX);
+       }
+
+       WARN_ON(node);
+       log_addr_space_size = ilog2(total_ranges_len);
+       if (log_addr_space_size <
+           (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_min_addr_space)) ||
+           log_addr_space_size >
+           (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_addr_space))) {
+               err = -EOPNOTSUPP;
+               goto out;
+       }
+
+       MLX5_SET(page_track, obj_context, log_addr_space_size,
+                log_addr_space_size);
+       err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+       if (err)
+               goto out;
+
+       tracker->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+out:
+       kfree(in);
+       return err;
+}
+
+static int mlx5vf_cmd_destroy_tracker(struct mlx5_core_dev *mdev,
+                                     u32 tracker_id)
+{
+       u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+       u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+
+       MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+       MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK);
+       MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, tracker_id);
+
+       return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
 static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev,
                             struct mlx5_vhca_cq_buf *buf, int nent,
                             int cqe_size)
@@ -833,6 +975,7 @@ _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev)
 
        WARN_ON(mvdev->mdev_detach);
 
+       mlx5vf_cmd_destroy_tracker(mdev, tracker->id);
        mlx5vf_destroy_qp(mdev, tracker->fw_qp);
        mlx5vf_free_qp_recv_resources(mdev, tracker->host_qp);
        mlx5vf_destroy_qp(mdev, tracker->host_qp);
@@ -941,6 +1084,10 @@ int mlx5vf_start_page_tracker(struct vfio_device *vdev,
 
        tracker->host_qp = host_qp;
        tracker->fw_qp = fw_qp;
+       err = mlx5vf_create_tracker(mdev, mvdev, ranges, nnodes);
+       if (err)
+               goto err_activate;
+
        *page_size = host_qp->tracked_page_size;
        mvdev->log_active = true;
        mlx5vf_state_mutex_unlock(mvdev);
index e71ec01..658925b 100644 (file)
@@ -80,6 +80,7 @@ struct mlx5_vhca_qp {
 };
 
 struct mlx5_vhca_page_tracker {
+       u32 id;
        u32 pdn;
        struct mlx5_uars_page *uar;
        struct mlx5_vhca_cq cq;