1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <linux/virtio_config.h>
11 #include <linux/auxiliary_bus.h>
12 #include <linux/mlx5/cq.h>
13 #include <linux/mlx5/qp.h>
14 #include <linux/mlx5/device.h>
15 #include <linux/mlx5/driver.h>
16 #include <linux/mlx5/vport.h>
17 #include <linux/mlx5/fs.h>
18 #include <linux/mlx5/mlx5_ifc_vdpa.h>
19 #include <linux/mlx5/mpfs.h>
20 #include "mlx5_vdpa.h"
21 #include "mlx5_vnet.h"
23 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
24 MODULE_DESCRIPTION("Mellanox VDPA driver");
25 MODULE_LICENSE("Dual BSD/GPL");
27 #define VALID_FEATURES_MASK \
28 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \
29 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \
30 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \
31 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
32 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \
33 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \
34 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \
35 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \
36 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \
37 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \
38 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \
39 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \
40 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
42 #define VALID_STATUS_MASK \
43 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \
44 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
46 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
48 #define MLX5V_UNTAGGED 0x1000
50 struct mlx5_vdpa_cq_buf {
51 struct mlx5_frag_buf_ctrl fbc;
52 struct mlx5_frag_buf frag_buf;
58 struct mlx5_core_cq mcq;
59 struct mlx5_vdpa_cq_buf buf;
64 struct mlx5_vdpa_umem {
65 struct mlx5_frag_buf_ctrl fbc;
66 struct mlx5_frag_buf frag_buf;
72 struct mlx5_core_qp mqp;
73 struct mlx5_frag_buf frag_buf;
79 struct mlx5_vq_restore_info {
91 struct mlx5_vdpa_virtqueue {
98 /* Resources for implementing the notification channel from the device
99 * to the driver. fwqp is the firmware end of an RC connection; the
100 * other end is vqqp used by the driver. cq is where completions are
103 struct mlx5_vdpa_cq cq;
104 struct mlx5_vdpa_qp fwqp;
105 struct mlx5_vdpa_qp vqqp;
107 /* umem resources are required for the virtqueue operation. They're use
108 * is internal and they must be provided by the driver.
110 struct mlx5_vdpa_umem umem1;
111 struct mlx5_vdpa_umem umem2;
112 struct mlx5_vdpa_umem umem3;
118 struct mlx5_vdpa_net *ndev;
124 /* keep last in the struct */
125 struct mlx5_vq_restore_info ri;
128 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
130 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
131 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
137 return idx <= mvdev->max_idx;
140 static void free_resources(struct mlx5_vdpa_net *ndev);
141 static void init_mvqs(struct mlx5_vdpa_net *ndev);
142 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
143 static void teardown_driver(struct mlx5_vdpa_net *ndev);
145 static bool mlx5_vdpa_debug;
147 #define MLX5_CVQ_MAX_ENT 16
149 #define MLX5_LOG_VIO_FLAG(_feature) \
151 if (features & BIT_ULL(_feature)) \
152 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \
155 #define MLX5_LOG_VIO_STAT(_status) \
157 if (status & (_status)) \
158 mlx5_vdpa_info(mvdev, "%s\n", #_status); \
161 /* TODO: cross-endian support */
162 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
164 return virtio_legacy_is_little_endian() ||
165 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
168 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
170 return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
173 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
175 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
178 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
180 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
183 return mvdev->max_vqs;
186 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
188 return idx == ctrl_vq_idx(mvdev);
191 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
193 if (status & ~VALID_STATUS_MASK)
194 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
195 status & ~VALID_STATUS_MASK);
197 if (!mlx5_vdpa_debug)
200 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
201 if (set && !status) {
202 mlx5_vdpa_info(mvdev, "driver resets the device\n");
206 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
207 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
208 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
209 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
210 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
211 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
214 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
216 if (features & ~VALID_FEATURES_MASK)
217 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
218 features & ~VALID_FEATURES_MASK);
220 if (!mlx5_vdpa_debug)
223 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
225 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
227 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
228 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
229 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
230 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
231 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
232 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
233 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
234 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
235 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
236 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
237 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
238 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
239 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
240 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
241 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
242 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
243 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
244 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
245 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
246 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
247 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
248 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
249 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
250 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
251 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
252 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
253 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
254 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
255 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
256 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
257 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
258 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
259 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
260 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
263 static int create_tis(struct mlx5_vdpa_net *ndev)
265 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
266 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
270 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
271 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
272 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
274 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
279 static void destroy_tis(struct mlx5_vdpa_net *ndev)
281 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
284 #define MLX5_VDPA_CQE_SIZE 64
285 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
287 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
289 struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
290 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
291 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
294 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
295 ndev->mvdev.mdev->priv.numa_node);
299 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
301 buf->cqe_size = MLX5_VDPA_CQE_SIZE;
307 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
309 struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
311 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
312 ndev->mvdev.mdev->priv.numa_node);
315 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
317 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
320 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
322 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
325 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
327 struct mlx5_cqe64 *cqe64;
331 for (i = 0; i < buf->nent; i++) {
332 cqe = get_cqe(vcq, i);
334 cqe64->op_own = MLX5_CQE_INVALID << 4;
338 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
340 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
342 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
343 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
349 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
352 vqp->db.db[0] = cpu_to_be32(vqp->head);
355 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
356 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
358 struct mlx5_vdpa_qp *vqp;
362 vqp = fw ? &mvq->fwqp : &mvq->vqqp;
363 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
364 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
366 /* Firmware QP is allocated by the driver for the firmware's
367 * use so we can skip part of the params as they will be chosen by firmware
369 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
370 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
371 MLX5_SET(qpc, qpc, no_sq, 1);
375 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
376 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
377 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
378 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
379 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
380 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
381 MLX5_SET(qpc, qpc, no_sq, 1);
382 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
383 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
384 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
385 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
386 mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
389 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
391 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
392 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
393 ndev->mvdev.mdev->priv.numa_node);
396 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
398 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
401 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
402 struct mlx5_vdpa_qp *vqp)
404 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
405 int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
406 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
413 err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
417 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
420 inlen += vqp->frag_buf.npages * sizeof(__be64);
423 in = kzalloc(inlen, GFP_KERNEL);
429 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
430 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
431 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
432 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
433 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
434 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
436 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
437 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
438 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
443 vqp->mqp.uid = ndev->mvdev.res.uid;
444 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
447 rx_post(vqp, mvq->num_ent);
453 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
456 rq_buf_free(ndev, vqp);
461 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
463 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
465 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
466 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
467 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
468 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
469 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
471 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
472 rq_buf_free(ndev, vqp);
476 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
478 return get_sw_cqe(cq, cq->mcq.cons_index);
481 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
483 struct mlx5_cqe64 *cqe64;
485 cqe64 = next_cqe_sw(vcq);
489 vcq->mcq.cons_index++;
493 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
495 struct mlx5_vdpa_net *ndev = mvq->ndev;
496 struct vdpa_callback *event_cb;
498 event_cb = &ndev->event_cbs[mvq->index];
499 mlx5_cq_set_ci(&mvq->cq.mcq);
501 /* make sure CQ cosumer update is visible to the hardware before updating
502 * RX doorbell record.
505 rx_post(&mvq->vqqp, num);
506 if (event_cb->callback)
507 event_cb->callback(event_cb->private);
510 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
512 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
513 struct mlx5_vdpa_net *ndev = mvq->ndev;
514 void __iomem *uar_page = ndev->mvdev.res.uar->map;
517 while (!mlx5_vdpa_poll_one(&mvq->cq)) {
519 if (num > mvq->num_ent / 2) {
520 /* If completions keep coming while we poll, we want to
521 * let the hardware know that we consumed them by
522 * updating the doorbell record. We also let vdpa core
523 * know about this so it passes it on the virtio driver
526 mlx5_vdpa_handle_completions(mvq, num);
532 mlx5_vdpa_handle_completions(mvq, num);
534 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
537 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
539 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
540 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
541 void __iomem *uar_page = ndev->mvdev.res.uar->map;
542 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
543 struct mlx5_vdpa_cq *vcq = &mvq->cq;
551 err = mlx5_db_alloc(mdev, &vcq->db);
555 vcq->mcq.set_ci_db = vcq->db.db;
556 vcq->mcq.arm_db = vcq->db.db + 1;
557 vcq->mcq.cqe_sz = 64;
559 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
563 cq_frag_buf_init(vcq, &vcq->buf);
565 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
566 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
567 in = kzalloc(inlen, GFP_KERNEL);
573 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
574 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
575 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
577 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
578 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
580 /* Use vector 0 by default. Consider adding code to choose least used
583 err = mlx5_comp_eqn_get(mdev, 0, &eqn);
587 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
588 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
589 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
590 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
591 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
593 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
597 vcq->mcq.comp = mlx5_vdpa_cq_comp;
599 vcq->mcq.set_ci_db = vcq->db.db;
600 vcq->mcq.arm_db = vcq->db.db + 1;
601 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
608 cq_frag_buf_free(ndev, &vcq->buf);
610 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
614 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
616 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
617 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
618 struct mlx5_vdpa_cq *vcq = &mvq->cq;
620 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
621 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
624 cq_frag_buf_free(ndev, &vcq->buf);
625 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
628 static int read_umem_params(struct mlx5_vdpa_net *ndev)
630 u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
631 u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
632 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
638 out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
639 out = kzalloc(out_size, GFP_KERNEL);
643 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
644 MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
645 err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
647 mlx5_vdpa_warn(&ndev->mvdev,
648 "Failed reading vdpa umem capabilities with err %d\n", err);
652 caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
654 ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
655 ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
657 ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
658 ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
660 ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
661 ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
668 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
669 struct mlx5_vdpa_umem **umemp)
676 p_a = ndev->umem_1_buffer_param_a;
677 p_b = ndev->umem_1_buffer_param_b;
678 *umemp = &mvq->umem1;
681 p_a = ndev->umem_2_buffer_param_a;
682 p_b = ndev->umem_2_buffer_param_b;
683 *umemp = &mvq->umem2;
686 p_a = ndev->umem_3_buffer_param_a;
687 p_b = ndev->umem_3_buffer_param_b;
688 *umemp = &mvq->umem3;
692 (*umemp)->size = p_a * mvq->num_ent + p_b;
695 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
697 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
700 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
703 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
708 struct mlx5_vdpa_umem *umem;
710 set_umem_size(ndev, mvq, num, &umem);
711 err = umem_frag_buf_alloc(ndev, umem, umem->size);
715 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
717 in = kzalloc(inlen, GFP_KERNEL);
723 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
724 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
725 um = MLX5_ADDR_OF(create_umem_in, in, umem);
726 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
727 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
729 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
730 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
732 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
734 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
739 umem->id = MLX5_GET(create_umem_out, out, umem_id);
746 umem_frag_buf_free(ndev, umem);
750 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
752 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
753 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
754 struct mlx5_vdpa_umem *umem;
768 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
769 MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
770 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
773 umem_frag_buf_free(ndev, umem);
776 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
781 for (num = 1; num <= 3; num++) {
782 err = create_umem(ndev, mvq, num);
789 for (num--; num > 0; num--)
790 umem_destroy(ndev, mvq, num);
795 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
799 for (num = 3; num > 0; num--)
800 umem_destroy(ndev, mvq, num);
803 static int get_queue_type(struct mlx5_vdpa_net *ndev)
807 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
809 /* prefer split queue */
810 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
811 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
813 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
815 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
818 static bool vq_is_tx(u16 idx)
824 MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
825 MLX5_VIRTIO_NET_F_HOST_ECN = 4,
826 MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
827 MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
828 MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
829 MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
830 MLX5_VIRTIO_NET_F_CSUM = 10,
831 MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
832 MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
835 static u16 get_features(u64 features)
837 return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
838 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
839 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
840 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
841 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
842 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
843 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
844 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
847 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
849 return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
850 BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
853 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
855 return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
856 (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
857 pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
860 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
862 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
863 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
871 err = umems_create(ndev, mvq);
875 in = kzalloc(inlen, GFP_KERNEL);
881 mlx_features = get_features(ndev->mvdev.actual_features);
882 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
884 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
885 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
886 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
888 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
889 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
890 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
891 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
893 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
895 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
896 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
898 if (vq_is_tx(mvq->index))
899 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
902 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
903 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
905 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
906 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
909 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
910 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
911 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
912 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
913 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
914 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
915 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
916 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
917 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
918 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
919 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
920 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
921 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
922 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
923 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
924 if (counters_supported(&ndev->mvdev))
925 MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
927 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
931 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
933 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
940 umems_destroy(ndev, mvq);
944 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
946 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
947 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
949 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
950 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
951 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
952 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
953 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
954 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
955 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
956 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
959 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
960 umems_destroy(ndev, mvq);
963 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
965 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
968 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
970 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
973 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
974 int *outlen, u32 qpn, u32 rqpn)
980 case MLX5_CMD_OP_2RST_QP:
981 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
982 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
983 *in = kzalloc(*inlen, GFP_KERNEL);
984 *out = kzalloc(*outlen, GFP_KERNEL);
988 MLX5_SET(qp_2rst_in, *in, opcode, cmd);
989 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
990 MLX5_SET(qp_2rst_in, *in, qpn, qpn);
992 case MLX5_CMD_OP_RST2INIT_QP:
993 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
994 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
995 *in = kzalloc(*inlen, GFP_KERNEL);
996 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
1000 MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
1001 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
1002 MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
1003 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1004 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1005 MLX5_SET(qpc, qpc, rwe, 1);
1006 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1007 MLX5_SET(ads, pp, vhca_port_num, 1);
1009 case MLX5_CMD_OP_INIT2RTR_QP:
1010 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
1011 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
1012 *in = kzalloc(*inlen, GFP_KERNEL);
1013 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
1017 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
1018 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
1019 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
1020 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1021 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
1022 MLX5_SET(qpc, qpc, log_msg_max, 30);
1023 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1024 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1025 MLX5_SET(ads, pp, fl, 1);
1027 case MLX5_CMD_OP_RTR2RTS_QP:
1028 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
1029 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
1030 *in = kzalloc(*inlen, GFP_KERNEL);
1031 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1035 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1036 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1037 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1038 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1039 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1040 MLX5_SET(ads, pp, ack_timeout, 14);
1041 MLX5_SET(qpc, qpc, retry_count, 7);
1042 MLX5_SET(qpc, qpc, rnr_retry, 7);
1045 goto outerr_nullify;
1058 static void free_inout(void *in, void *out)
1064 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1065 * firmware. The fw argument indicates whether the subjected QP is the one used
1068 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1076 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1080 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1081 free_inout(in, out);
1085 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1089 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1093 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1097 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1101 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1105 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1109 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1113 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1116 struct mlx5_virtq_attr {
1118 u16 available_index;
1122 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1123 struct mlx5_virtq_attr *attr)
1125 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1126 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1132 out = kzalloc(outlen, GFP_KERNEL);
1136 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1138 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1139 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1140 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1141 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1142 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1146 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1147 memset(attr, 0, sizeof(*attr));
1148 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1149 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1150 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1159 static bool is_valid_state_change(int oldstate, int newstate)
1162 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1163 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1164 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1165 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1166 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1167 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1173 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1175 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1176 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1182 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1185 if (!is_valid_state_change(mvq->fw_state, state))
1188 in = kzalloc(inlen, GFP_KERNEL);
1192 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1194 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1195 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1196 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1197 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1199 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1200 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1201 MLX5_VIRTQ_MODIFY_MASK_STATE);
1202 MLX5_SET(virtio_net_q_object, obj_context, state, state);
1203 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1206 mvq->fw_state = state;
1211 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1213 u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1214 u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1218 if (!counters_supported(&ndev->mvdev))
1221 cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1223 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1224 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1225 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1227 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1231 mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1236 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1238 u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1239 u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1241 if (!counters_supported(&ndev->mvdev))
1244 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1245 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1246 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1247 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1248 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1249 mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1252 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1254 struct vdpa_callback *cb = priv;
1257 return cb->callback(cb->private);
1262 static void alloc_vector(struct mlx5_vdpa_net *ndev,
1263 struct mlx5_vdpa_virtqueue *mvq)
1265 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1266 struct mlx5_vdpa_irq_pool_entry *ent;
1270 for (i = 0; i < irqp->num_ent; i++) {
1271 ent = &irqp->entries[i];
1273 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1274 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1275 ent->dev_id = &ndev->event_cbs[mvq->index];
1276 err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1277 ent->name, ent->dev_id);
1282 mvq->map = ent->map;
1288 static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1289 struct mlx5_vdpa_virtqueue *mvq)
1291 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1294 for (i = 0; i < irqp->num_ent; i++)
1295 if (mvq->map.virq == irqp->entries[i].map.virq) {
1296 free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1297 irqp->entries[i].used = false;
1302 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1304 u16 idx = mvq->index;
1310 if (mvq->initialized)
1313 err = cq_create(ndev, idx, mvq->num_ent);
1317 err = qp_create(ndev, mvq, &mvq->fwqp);
1321 err = qp_create(ndev, mvq, &mvq->vqqp);
1325 err = connect_qps(ndev, mvq);
1329 err = counter_set_alloc(ndev, mvq);
1333 alloc_vector(ndev, mvq);
1334 err = create_virtqueue(ndev, mvq);
1339 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1341 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1347 mvq->initialized = true;
1351 destroy_virtqueue(ndev, mvq);
1353 dealloc_vector(ndev, mvq);
1354 counter_set_dealloc(ndev, mvq);
1356 qp_destroy(ndev, &mvq->vqqp);
1358 qp_destroy(ndev, &mvq->fwqp);
1360 cq_destroy(ndev, idx);
1364 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1366 struct mlx5_virtq_attr attr;
1368 if (!mvq->initialized)
1371 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1374 if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1375 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1377 if (query_virtqueue(ndev, mvq, &attr)) {
1378 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1381 mvq->avail_idx = attr.available_index;
1382 mvq->used_idx = attr.used_index;
1385 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1389 for (i = 0; i < ndev->mvdev.max_vqs; i++)
1390 suspend_vq(ndev, &ndev->vqs[i]);
1393 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1395 if (!mvq->initialized)
1398 suspend_vq(ndev, mvq);
1399 destroy_virtqueue(ndev, mvq);
1400 dealloc_vector(ndev, mvq);
1401 counter_set_dealloc(ndev, mvq);
1402 qp_destroy(ndev, &mvq->vqqp);
1403 qp_destroy(ndev, &mvq->fwqp);
1404 cq_destroy(ndev, mvq->index);
1405 mvq->initialized = false;
1408 static int create_rqt(struct mlx5_vdpa_net *ndev)
1410 int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1411 int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1419 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1420 in = kzalloc(inlen, GFP_KERNEL);
1424 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1425 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1427 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1428 MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1429 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1430 for (i = 0, j = 0; i < act_sz; i++, j += 2)
1431 list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1433 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1434 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1442 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1444 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1446 int act_sz = roundup_pow_of_two(num / 2);
1454 inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1455 in = kzalloc(inlen, GFP_KERNEL);
1459 MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1460 MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1461 rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1462 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1464 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1465 for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1466 list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1468 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1469 err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1477 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1479 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1482 static int create_tir(struct mlx5_vdpa_net *ndev)
1484 #define HASH_IP_L4PORTS \
1485 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \
1486 MLX5_HASH_FIELD_SEL_L4_DPORT)
1487 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1488 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1489 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1490 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1491 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1498 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1502 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1503 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1504 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1506 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1507 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1508 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1509 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1511 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1512 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1513 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1514 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1516 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1517 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1519 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1524 mlx5_vdpa_add_tirn(ndev);
1528 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1530 mlx5_vdpa_remove_tirn(ndev);
1531 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1534 #define MAX_STEERING_ENT 0x8000
1535 #define MAX_STEERING_GROUPS 2
1537 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1543 static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1544 struct macvlan_node *node,
1545 struct mlx5_flow_act *flow_act,
1546 struct mlx5_flow_destination *dests)
1548 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1551 node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1552 if (IS_ERR(node->ucast_counter.counter))
1553 return PTR_ERR(node->ucast_counter.counter);
1555 node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1556 if (IS_ERR(node->mcast_counter.counter)) {
1557 err = PTR_ERR(node->mcast_counter.counter);
1558 goto err_mcast_counter;
1561 dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1562 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1566 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1573 static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1574 struct macvlan_node *node)
1576 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1577 mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1578 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1582 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1583 struct macvlan_node *node)
1585 struct mlx5_flow_destination dests[NUM_DESTS] = {};
1586 struct mlx5_flow_act flow_act = {};
1587 struct mlx5_flow_spec *spec;
1595 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1599 vid = key2vid(node->macvlan);
1600 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1601 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1602 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1603 dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1604 dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1605 eth_broadcast_addr(dmac_c);
1606 ether_addr_copy(dmac_v, mac);
1607 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1608 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1609 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1612 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1613 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1615 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1616 dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1617 dests[0].tir_num = ndev->res.tirn;
1618 err = add_steering_counters(ndev, node, &flow_act, dests);
1622 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1623 dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1625 node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1626 if (IS_ERR(node->ucast_rule)) {
1627 err = PTR_ERR(node->ucast_rule);
1631 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1632 dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1635 memset(dmac_c, 0, ETH_ALEN);
1636 memset(dmac_v, 0, ETH_ALEN);
1639 node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1640 if (IS_ERR(node->mcast_rule)) {
1641 err = PTR_ERR(node->mcast_rule);
1645 mlx5_vdpa_add_rx_counters(ndev, node);
1649 mlx5_del_flow_rules(node->ucast_rule);
1651 remove_steering_counters(ndev, node);
1657 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1658 struct macvlan_node *node)
1660 mlx5_vdpa_remove_rx_counters(ndev, node);
1661 mlx5_del_flow_rules(node->ucast_rule);
1662 mlx5_del_flow_rules(node->mcast_rule);
1665 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1670 vlan = MLX5V_UNTAGGED;
1672 val = (u64)vlan << 48 |
1683 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1685 struct macvlan_node *pos;
1688 idx = hash_64(value, 8); // tbd 8
1689 hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1690 if (pos->macvlan == value)
1696 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
1698 struct macvlan_node *ptr;
1703 val = search_val(mac, vid, tagged);
1704 if (mac_vlan_lookup(ndev, val))
1707 ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1711 ptr->tagged = tagged;
1714 err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
1718 idx = hash_64(val, 8);
1719 hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1727 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1729 struct macvlan_node *ptr;
1731 ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1735 hlist_del(&ptr->hlist);
1736 mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
1737 remove_steering_counters(ndev, ptr);
1741 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1743 struct macvlan_node *pos;
1744 struct hlist_node *n;
1747 for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1748 hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1749 hlist_del(&pos->hlist);
1750 mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
1751 remove_steering_counters(ndev, pos);
1757 static int setup_steering(struct mlx5_vdpa_net *ndev)
1759 struct mlx5_flow_table_attr ft_attr = {};
1760 struct mlx5_flow_namespace *ns;
1763 ft_attr.max_fte = MAX_STEERING_ENT;
1764 ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1766 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1768 mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1772 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1773 if (IS_ERR(ndev->rxft)) {
1774 mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1775 return PTR_ERR(ndev->rxft);
1777 mlx5_vdpa_add_rx_flow_table(ndev);
1779 err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1786 mlx5_vdpa_remove_rx_flow_table(ndev);
1787 mlx5_destroy_flow_table(ndev->rxft);
1791 static void teardown_steering(struct mlx5_vdpa_net *ndev)
1793 clear_mac_vlan_table(ndev);
1794 mlx5_vdpa_remove_rx_flow_table(ndev);
1795 mlx5_destroy_flow_table(ndev->rxft);
1798 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1800 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1801 struct mlx5_control_vq *cvq = &mvdev->cvq;
1802 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1803 struct mlx5_core_dev *pfmdev;
1805 u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1807 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1809 case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1810 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1811 if (read != ETH_ALEN)
1814 if (!memcmp(ndev->config.mac, mac, 6)) {
1815 status = VIRTIO_NET_OK;
1819 if (is_zero_ether_addr(mac))
1822 if (!is_zero_ether_addr(ndev->config.mac)) {
1823 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1824 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1830 if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1831 mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1836 /* backup the original mac address so that if failed to add the forward rules
1837 * we could restore it
1839 memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1841 memcpy(ndev->config.mac, mac, ETH_ALEN);
1843 /* Need recreate the flow table entry, so that the packet could forward back
1845 mac_vlan_del(ndev, mac_back, 0, false);
1847 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1848 mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1850 /* Although it hardly run here, we still need double check */
1851 if (is_zero_ether_addr(mac_back)) {
1852 mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1856 /* Try to restore original mac address to MFPS table, and try to restore
1857 * the forward rule entry.
1859 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1860 mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
1864 if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
1865 mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
1869 memcpy(ndev->config.mac, mac_back, ETH_ALEN);
1871 if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
1872 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
1877 status = VIRTIO_NET_OK;
1887 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1889 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1890 int cur_qps = ndev->cur_num_vqs / 2;
1894 if (cur_qps > newqps) {
1895 err = modify_rqt(ndev, 2 * newqps);
1899 for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1900 teardown_vq(ndev, &ndev->vqs[i]);
1902 ndev->cur_num_vqs = 2 * newqps;
1904 ndev->cur_num_vqs = 2 * newqps;
1905 for (i = cur_qps * 2; i < 2 * newqps; i++) {
1906 err = setup_vq(ndev, &ndev->vqs[i]);
1910 err = modify_rqt(ndev, 2 * newqps);
1917 for (--i; i >= 2 * cur_qps; --i)
1918 teardown_vq(ndev, &ndev->vqs[i]);
1920 ndev->cur_num_vqs = 2 * cur_qps;
1925 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1927 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1928 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1929 struct mlx5_control_vq *cvq = &mvdev->cvq;
1930 struct virtio_net_ctrl_mq mq;
1935 case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1936 /* This mq feature check aligns with pre-existing userspace
1939 * Without it, an untrusted driver could fake a multiqueue config
1940 * request down to a non-mq device that may cause kernel to
1941 * panic due to uninitialized resources for extra vqs. Even with
1942 * a well behaving guest driver, it is not expected to allow
1943 * changing the number of vqs on a non-mq device.
1945 if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
1948 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1949 if (read != sizeof(mq))
1952 newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1953 if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1954 newqps > ndev->rqt_size)
1957 if (ndev->cur_num_vqs == 2 * newqps) {
1958 status = VIRTIO_NET_OK;
1962 if (!change_num_qps(mvdev, newqps))
1963 status = VIRTIO_NET_OK;
1973 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1975 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1976 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1977 struct mlx5_control_vq *cvq = &mvdev->cvq;
1982 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
1986 case VIRTIO_NET_CTRL_VLAN_ADD:
1987 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1988 if (read != sizeof(vlan))
1991 id = mlx5vdpa16_to_cpu(mvdev, vlan);
1992 if (mac_vlan_add(ndev, ndev->config.mac, id, true))
1995 status = VIRTIO_NET_OK;
1997 case VIRTIO_NET_CTRL_VLAN_DEL:
1998 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1999 if (read != sizeof(vlan))
2002 id = mlx5vdpa16_to_cpu(mvdev, vlan);
2003 mac_vlan_del(ndev, ndev->config.mac, id, true);
2004 status = VIRTIO_NET_OK;
2013 static void mlx5_cvq_kick_handler(struct work_struct *work)
2015 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2016 struct virtio_net_ctrl_hdr ctrl;
2017 struct mlx5_vdpa_wq_ent *wqent;
2018 struct mlx5_vdpa_dev *mvdev;
2019 struct mlx5_control_vq *cvq;
2020 struct mlx5_vdpa_net *ndev;
2024 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2025 mvdev = wqent->mvdev;
2026 ndev = to_mlx5_vdpa_ndev(mvdev);
2029 down_write(&ndev->reslock);
2031 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2034 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2041 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2046 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2047 if (read != sizeof(ctrl))
2050 cvq->received_desc++;
2051 switch (ctrl.class) {
2052 case VIRTIO_NET_CTRL_MAC:
2053 status = handle_ctrl_mac(mvdev, ctrl.cmd);
2055 case VIRTIO_NET_CTRL_MQ:
2056 status = handle_ctrl_mq(mvdev, ctrl.cmd);
2058 case VIRTIO_NET_CTRL_VLAN:
2059 status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2065 /* Make sure data is written before advancing index */
2068 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2069 vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2070 vringh_kiov_cleanup(&cvq->riov);
2071 vringh_kiov_cleanup(&cvq->wiov);
2073 if (vringh_need_notify_iotlb(&cvq->vring))
2074 vringh_notify(&cvq->vring);
2076 cvq->completed_desc++;
2077 queue_work(mvdev->wq, &wqent->work);
2082 up_write(&ndev->reslock);
2085 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2087 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2088 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2089 struct mlx5_vdpa_virtqueue *mvq;
2091 if (!is_index_valid(mvdev, idx))
2094 if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2095 if (!mvdev->wq || !mvdev->cvq.ready)
2098 queue_work(mvdev->wq, &ndev->cvq_ent.work);
2102 mvq = &ndev->vqs[idx];
2103 if (unlikely(!mvq->ready))
2106 iowrite16(idx, ndev->mvdev.res.kick_addr);
2109 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2110 u64 driver_area, u64 device_area)
2112 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2113 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2114 struct mlx5_vdpa_virtqueue *mvq;
2116 if (!is_index_valid(mvdev, idx))
2119 if (is_ctrl_vq_idx(mvdev, idx)) {
2120 mvdev->cvq.desc_addr = desc_area;
2121 mvdev->cvq.device_addr = device_area;
2122 mvdev->cvq.driver_addr = driver_area;
2126 mvq = &ndev->vqs[idx];
2127 mvq->desc_addr = desc_area;
2128 mvq->device_addr = device_area;
2129 mvq->driver_addr = driver_area;
2133 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2135 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2136 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2137 struct mlx5_vdpa_virtqueue *mvq;
2139 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2142 mvq = &ndev->vqs[idx];
2146 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2148 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2149 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2151 ndev->event_cbs[idx] = *cb;
2152 if (is_ctrl_vq_idx(mvdev, idx))
2153 mvdev->cvq.event_cb = *cb;
2156 static void mlx5_cvq_notify(struct vringh *vring)
2158 struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2160 if (!cvq->event_cb.callback)
2163 cvq->event_cb.callback(cvq->event_cb.private);
2166 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2168 struct mlx5_control_vq *cvq = &mvdev->cvq;
2174 cvq->vring.notify = mlx5_cvq_notify;
2177 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2179 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2180 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2181 struct mlx5_vdpa_virtqueue *mvq;
2184 if (!mvdev->actual_features)
2187 if (!is_index_valid(mvdev, idx))
2190 if (is_ctrl_vq_idx(mvdev, idx)) {
2191 set_cvq_ready(mvdev, ready);
2195 mvq = &ndev->vqs[idx];
2197 suspend_vq(ndev, mvq);
2199 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2201 mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2210 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2212 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2213 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2215 if (!is_index_valid(mvdev, idx))
2218 if (is_ctrl_vq_idx(mvdev, idx))
2219 return mvdev->cvq.ready;
2221 return ndev->vqs[idx].ready;
2224 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2225 const struct vdpa_vq_state *state)
2227 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2228 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2229 struct mlx5_vdpa_virtqueue *mvq;
2231 if (!is_index_valid(mvdev, idx))
2234 if (is_ctrl_vq_idx(mvdev, idx)) {
2235 mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2239 mvq = &ndev->vqs[idx];
2240 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2241 mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2245 mvq->used_idx = state->split.avail_index;
2246 mvq->avail_idx = state->split.avail_index;
2250 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2252 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2253 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2254 struct mlx5_vdpa_virtqueue *mvq;
2255 struct mlx5_virtq_attr attr;
2258 if (!is_index_valid(mvdev, idx))
2261 if (is_ctrl_vq_idx(mvdev, idx)) {
2262 state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2266 mvq = &ndev->vqs[idx];
2267 /* If the virtq object was destroyed, use the value saved at
2268 * the last minute of suspend_vq. This caters for userspace
2269 * that cares about emulating the index after vq is stopped.
2271 if (!mvq->initialized) {
2272 /* Firmware returns a wrong value for the available index.
2273 * Since both values should be identical, we take the value of
2274 * used_idx which is reported correctly.
2276 state->split.avail_index = mvq->used_idx;
2280 err = query_virtqueue(ndev, mvq, &attr);
2282 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2285 state->split.avail_index = attr.used_index;
2289 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2294 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2296 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2298 if (is_ctrl_vq_idx(mvdev, idx))
2299 return MLX5_VDPA_CVQ_GROUP;
2301 return MLX5_VDPA_DATAVQ_GROUP;
2304 static u64 mlx_to_vritio_features(u16 dev_features)
2308 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2309 result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2310 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2311 result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2312 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2313 result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2314 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2315 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2316 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2317 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2318 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2319 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2320 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2321 result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2322 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2323 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2324 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2325 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2330 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2332 u64 mlx_vdpa_features = 0;
2335 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2336 mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2337 if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2338 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2339 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2340 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2341 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2342 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2343 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2344 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2345 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2346 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2348 return mlx_vdpa_features;
2351 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2353 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2354 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2356 print_features(mvdev, ndev->mvdev.mlx_features, false);
2357 return ndev->mvdev.mlx_features;
2360 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2362 /* Minimum features to expect */
2363 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2366 /* Double check features combination sent down by the driver.
2367 * Fail invalid features due to absence of the depended feature.
2369 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2370 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2371 * By failing the invalid features sent down by untrusted drivers,
2372 * we're assured the assumption made upon is_index_valid() and
2373 * is_ctrl_vq_idx() will not be compromised.
2375 if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2376 BIT_ULL(VIRTIO_NET_F_MQ))
2382 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2384 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2388 for (i = 0; i < mvdev->max_vqs; i++) {
2389 err = setup_vq(ndev, &ndev->vqs[i]);
2397 for (--i; i >= 0; i--)
2398 teardown_vq(ndev, &ndev->vqs[i]);
2403 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2405 struct mlx5_vdpa_virtqueue *mvq;
2408 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2409 mvq = &ndev->vqs[i];
2410 if (!mvq->initialized)
2413 teardown_vq(ndev, mvq);
2417 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2419 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2420 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2421 /* MQ supported. CVQ index is right above the last data virtqueue's */
2422 mvdev->max_idx = mvdev->max_vqs;
2424 /* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2430 /* Two data virtqueues only: one for rx and one for tx */
2435 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2437 u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2438 u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2441 MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2442 MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2443 MLX5_SET(query_vport_state_in, in, vport_number, vport);
2445 MLX5_SET(query_vport_state_in, in, other_vport, 1);
2447 err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2451 return MLX5_GET(query_vport_state_out, out, state);
2454 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2456 if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2463 static void update_carrier(struct work_struct *work)
2465 struct mlx5_vdpa_wq_ent *wqent;
2466 struct mlx5_vdpa_dev *mvdev;
2467 struct mlx5_vdpa_net *ndev;
2469 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2470 mvdev = wqent->mvdev;
2471 ndev = to_mlx5_vdpa_ndev(mvdev);
2472 if (get_link_state(mvdev))
2473 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2475 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2477 if (ndev->config_cb.callback)
2478 ndev->config_cb.callback(ndev->config_cb.private);
2483 static int queue_link_work(struct mlx5_vdpa_net *ndev)
2485 struct mlx5_vdpa_wq_ent *wqent;
2487 wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2491 wqent->mvdev = &ndev->mvdev;
2492 INIT_WORK(&wqent->work, update_carrier);
2493 queue_work(ndev->mvdev.wq, &wqent->work);
2497 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2499 struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2500 struct mlx5_eqe *eqe = param;
2501 int ret = NOTIFY_DONE;
2503 if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2504 switch (eqe->sub_type) {
2505 case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2506 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2507 if (queue_link_work(ndev))
2520 static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2522 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2525 ndev->nb.notifier_call = event_handler;
2526 mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2527 ndev->nb_registered = true;
2528 queue_link_work(ndev);
2531 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2533 if (!ndev->nb_registered)
2536 ndev->nb_registered = false;
2537 mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2539 flush_workqueue(ndev->mvdev.wq);
2542 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2544 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2545 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2548 print_features(mvdev, features, true);
2550 err = verify_driver_features(mvdev, features);
2554 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2555 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2556 ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2560 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
2561 * 5.1.6.5.5 "Device operation in multiqueue mode":
2563 * Multiqueue is disabled by default.
2564 * The driver enables multiqueue by sending a command using class
2565 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
2566 * operation, as follows: ...
2568 ndev->cur_num_vqs = 2;
2570 update_cvq_info(mvdev);
2574 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2576 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2577 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2579 ndev->config_cb = *cb;
2582 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2583 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2585 return MLX5_VDPA_MAX_VQ_ENTRIES;
2588 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2590 return VIRTIO_ID_NET;
2593 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2595 return PCI_VENDOR_ID_MELLANOX;
2598 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2600 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2601 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2603 print_status(mvdev, ndev->mvdev.status, false);
2604 return ndev->mvdev.status;
2607 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2609 struct mlx5_vq_restore_info *ri = &mvq->ri;
2610 struct mlx5_virtq_attr attr = {};
2613 if (mvq->initialized) {
2614 err = query_virtqueue(ndev, mvq, &attr);
2619 ri->avail_index = attr.available_index;
2620 ri->used_index = attr.used_index;
2621 ri->ready = mvq->ready;
2622 ri->num_ent = mvq->num_ent;
2623 ri->desc_addr = mvq->desc_addr;
2624 ri->device_addr = mvq->device_addr;
2625 ri->driver_addr = mvq->driver_addr;
2631 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2635 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2636 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2637 save_channel_info(ndev, &ndev->vqs[i]);
2642 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2646 for (i = 0; i < ndev->mvdev.max_vqs; i++)
2647 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2650 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2652 struct mlx5_vdpa_virtqueue *mvq;
2653 struct mlx5_vq_restore_info *ri;
2656 mlx5_clear_vqs(ndev);
2658 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2659 mvq = &ndev->vqs[i];
2664 mvq->avail_idx = ri->avail_index;
2665 mvq->used_idx = ri->used_index;
2666 mvq->ready = ri->ready;
2667 mvq->num_ent = ri->num_ent;
2668 mvq->desc_addr = ri->desc_addr;
2669 mvq->device_addr = ri->device_addr;
2670 mvq->driver_addr = ri->driver_addr;
2675 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2676 struct vhost_iotlb *iotlb, unsigned int asid)
2678 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2682 err = save_channels_info(ndev);
2686 teardown_driver(ndev);
2687 mlx5_vdpa_destroy_mr_asid(mvdev, asid);
2688 err = mlx5_vdpa_create_mr(mvdev, iotlb, asid);
2692 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
2695 restore_channels_info(ndev);
2696 err = setup_driver(mvdev);
2703 mlx5_vdpa_destroy_mr_asid(mvdev, asid);
2708 /* reslock must be held for this function */
2709 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2711 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2714 WARN_ON(!rwsem_is_locked(&ndev->reslock));
2717 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2721 mlx5_vdpa_add_debugfs(ndev);
2723 err = read_umem_params(ndev);
2727 err = setup_virtqueues(mvdev);
2729 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2733 err = create_rqt(ndev);
2735 mlx5_vdpa_warn(mvdev, "create_rqt\n");
2739 err = create_tir(ndev);
2741 mlx5_vdpa_warn(mvdev, "create_tir\n");
2745 err = setup_steering(ndev);
2747 mlx5_vdpa_warn(mvdev, "setup_steering\n");
2759 teardown_virtqueues(ndev);
2761 mlx5_vdpa_remove_debugfs(ndev);
2766 /* reslock must be held for this function */
2767 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2770 WARN_ON(!rwsem_is_locked(&ndev->reslock));
2775 mlx5_vdpa_remove_debugfs(ndev);
2776 teardown_steering(ndev);
2779 teardown_virtqueues(ndev);
2780 ndev->setup = false;
2783 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2787 for (i = 0; i < ndev->mvdev.max_vqs; i++)
2788 ndev->vqs[i].ready = false;
2790 ndev->mvdev.cvq.ready = false;
2793 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2795 struct mlx5_control_vq *cvq = &mvdev->cvq;
2798 if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
2799 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2800 MLX5_CVQ_MAX_ENT, false,
2801 (struct vring_desc *)(uintptr_t)cvq->desc_addr,
2802 (struct vring_avail *)(uintptr_t)cvq->driver_addr,
2803 (struct vring_used *)(uintptr_t)cvq->device_addr);
2808 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2810 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2811 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2814 print_status(mvdev, status, true);
2816 down_write(&ndev->reslock);
2818 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2819 if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2820 err = setup_cvq_vring(mvdev);
2822 mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2825 register_link_notifier(ndev);
2826 err = setup_driver(mvdev);
2828 mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2832 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2837 ndev->mvdev.status = status;
2838 up_write(&ndev->reslock);
2842 unregister_link_notifier(ndev);
2844 mlx5_vdpa_destroy_mr(&ndev->mvdev);
2845 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2847 up_write(&ndev->reslock);
2850 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
2854 /* default mapping all groups are mapped to asid 0 */
2855 for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
2856 mvdev->group2asid[i] = 0;
2859 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2861 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2862 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2864 print_status(mvdev, 0, true);
2865 mlx5_vdpa_info(mvdev, "performing device reset\n");
2867 down_write(&ndev->reslock);
2868 unregister_link_notifier(ndev);
2869 teardown_driver(ndev);
2870 clear_vqs_ready(ndev);
2871 mlx5_vdpa_destroy_mr(&ndev->mvdev);
2872 ndev->mvdev.status = 0;
2873 ndev->mvdev.suspended = false;
2874 ndev->cur_num_vqs = 0;
2875 ndev->mvdev.cvq.received_desc = 0;
2876 ndev->mvdev.cvq.completed_desc = 0;
2877 memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
2878 ndev->mvdev.actual_features = 0;
2879 init_group_to_asid_map(mvdev);
2880 ++mvdev->generation;
2882 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2883 if (mlx5_vdpa_create_mr(mvdev, NULL, 0))
2884 mlx5_vdpa_warn(mvdev, "create MR failed\n");
2886 up_write(&ndev->reslock);
2891 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2893 return sizeof(struct virtio_net_config);
2896 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2899 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2900 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2902 if (offset + len <= sizeof(struct virtio_net_config))
2903 memcpy(buf, (u8 *)&ndev->config + offset, len);
2906 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2912 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2914 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2916 return mvdev->generation;
2919 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
2925 err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid);
2927 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
2932 err = mlx5_vdpa_change_map(mvdev, iotlb, asid);
2937 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
2938 struct vhost_iotlb *iotlb)
2940 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2941 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2944 down_write(&ndev->reslock);
2945 err = set_map_data(mvdev, iotlb, asid);
2946 up_write(&ndev->reslock);
2950 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
2952 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2954 if (is_ctrl_vq_idx(mvdev, idx))
2957 return mvdev->vdev.dma_dev;
2960 static void free_irqs(struct mlx5_vdpa_net *ndev)
2962 struct mlx5_vdpa_irq_pool_entry *ent;
2965 if (!msix_mode_supported(&ndev->mvdev))
2968 if (!ndev->irqp.entries)
2971 for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
2972 ent = ndev->irqp.entries + i;
2974 pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
2976 kfree(ndev->irqp.entries);
2979 static void mlx5_vdpa_free(struct vdpa_device *vdev)
2981 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2982 struct mlx5_core_dev *pfmdev;
2983 struct mlx5_vdpa_net *ndev;
2985 ndev = to_mlx5_vdpa_ndev(mvdev);
2987 free_resources(ndev);
2988 mlx5_vdpa_destroy_mr(mvdev);
2989 if (!is_zero_ether_addr(ndev->config.mac)) {
2990 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2991 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
2993 mlx5_vdpa_free_resources(&ndev->mvdev);
2995 kfree(ndev->event_cbs);
2999 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
3001 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3002 struct vdpa_notification_area ret = {};
3003 struct mlx5_vdpa_net *ndev;
3006 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
3009 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct
3010 * notification to avoid the risk of mapping pages that contain BAR of more
3013 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
3016 ndev = to_mlx5_vdpa_ndev(mvdev);
3017 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
3019 ret.size = PAGE_SIZE;
3023 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
3025 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3026 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3027 struct mlx5_vdpa_virtqueue *mvq;
3029 if (!is_index_valid(mvdev, idx))
3032 if (is_ctrl_vq_idx(mvdev, idx))
3035 mvq = &ndev->vqs[idx];
3039 return mvq->map.virq;
3042 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3044 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3046 return mvdev->actual_features;
3049 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3050 u64 *received_desc, u64 *completed_desc)
3052 u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3053 u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3058 if (!counters_supported(&ndev->mvdev))
3061 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3064 cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3066 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3067 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3068 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3069 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3071 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3075 ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3076 *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3077 *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3081 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3082 struct sk_buff *msg,
3083 struct netlink_ext_ack *extack)
3085 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3086 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3087 struct mlx5_vdpa_virtqueue *mvq;
3088 struct mlx5_control_vq *cvq;
3093 down_read(&ndev->reslock);
3094 if (!is_index_valid(mvdev, idx)) {
3095 NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3100 if (idx == ctrl_vq_idx(mvdev)) {
3102 received_desc = cvq->received_desc;
3103 completed_desc = cvq->completed_desc;
3107 mvq = &ndev->vqs[idx];
3108 err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3110 NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3116 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3119 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3123 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3126 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3132 up_read(&ndev->reslock);
3136 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3138 struct mlx5_control_vq *cvq;
3140 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3147 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3149 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3150 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3151 struct mlx5_vdpa_virtqueue *mvq;
3154 mlx5_vdpa_info(mvdev, "suspending device\n");
3156 down_write(&ndev->reslock);
3157 unregister_link_notifier(ndev);
3158 for (i = 0; i < ndev->cur_num_vqs; i++) {
3159 mvq = &ndev->vqs[i];
3160 suspend_vq(ndev, mvq);
3162 mlx5_vdpa_cvq_suspend(mvdev);
3163 mvdev->suspended = true;
3164 up_write(&ndev->reslock);
3168 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3171 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3173 if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3176 mvdev->group2asid[group] = asid;
3180 static const struct vdpa_config_ops mlx5_vdpa_ops = {
3181 .set_vq_address = mlx5_vdpa_set_vq_address,
3182 .set_vq_num = mlx5_vdpa_set_vq_num,
3183 .kick_vq = mlx5_vdpa_kick_vq,
3184 .set_vq_cb = mlx5_vdpa_set_vq_cb,
3185 .set_vq_ready = mlx5_vdpa_set_vq_ready,
3186 .get_vq_ready = mlx5_vdpa_get_vq_ready,
3187 .set_vq_state = mlx5_vdpa_set_vq_state,
3188 .get_vq_state = mlx5_vdpa_get_vq_state,
3189 .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3190 .get_vq_notification = mlx5_get_vq_notification,
3191 .get_vq_irq = mlx5_get_vq_irq,
3192 .get_vq_align = mlx5_vdpa_get_vq_align,
3193 .get_vq_group = mlx5_vdpa_get_vq_group,
3194 .get_device_features = mlx5_vdpa_get_device_features,
3195 .set_driver_features = mlx5_vdpa_set_driver_features,
3196 .get_driver_features = mlx5_vdpa_get_driver_features,
3197 .set_config_cb = mlx5_vdpa_set_config_cb,
3198 .get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3199 .get_device_id = mlx5_vdpa_get_device_id,
3200 .get_vendor_id = mlx5_vdpa_get_vendor_id,
3201 .get_status = mlx5_vdpa_get_status,
3202 .set_status = mlx5_vdpa_set_status,
3203 .reset = mlx5_vdpa_reset,
3204 .get_config_size = mlx5_vdpa_get_config_size,
3205 .get_config = mlx5_vdpa_get_config,
3206 .set_config = mlx5_vdpa_set_config,
3207 .get_generation = mlx5_vdpa_get_generation,
3208 .set_map = mlx5_vdpa_set_map,
3209 .set_group_asid = mlx5_set_group_asid,
3210 .get_vq_dma_dev = mlx5_get_vq_dma_dev,
3211 .free = mlx5_vdpa_free,
3212 .suspend = mlx5_vdpa_suspend,
3215 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3220 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3224 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3228 static int alloc_resources(struct mlx5_vdpa_net *ndev)
3230 struct mlx5_vdpa_net_resources *res = &ndev->res;
3234 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3238 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3242 err = create_tis(ndev);
3251 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3255 static void free_resources(struct mlx5_vdpa_net *ndev)
3257 struct mlx5_vdpa_net_resources *res = &ndev->res;
3263 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3267 static void init_mvqs(struct mlx5_vdpa_net *ndev)
3269 struct mlx5_vdpa_virtqueue *mvq;
3272 for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3273 mvq = &ndev->vqs[i];
3274 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3277 mvq->fwqp.fw = true;
3278 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3280 for (; i < ndev->mvdev.max_vqs; i++) {
3281 mvq = &ndev->vqs[i];
3282 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3288 struct mlx5_vdpa_mgmtdev {
3289 struct vdpa_mgmt_dev mgtdev;
3290 struct mlx5_adev *madev;
3291 struct mlx5_vdpa_net *ndev;
3294 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3296 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3300 in = kvzalloc(inlen, GFP_KERNEL);
3304 MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3305 MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3306 mtu + MLX5V_ETH_HARD_MTU);
3307 MLX5_SET(modify_nic_vport_context_in, in, opcode,
3308 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3310 err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3316 static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3318 struct mlx5_vdpa_irq_pool_entry *ent;
3321 if (!msix_mode_supported(&ndev->mvdev))
3324 if (!ndev->mvdev.mdev->pdev)
3327 ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3328 if (!ndev->irqp.entries)
3332 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3333 ent = ndev->irqp.entries + i;
3334 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3335 dev_name(&ndev->mvdev.vdev.dev), i);
3336 ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3340 ndev->irqp.num_ent++;
3344 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3345 const struct vdpa_dev_set_config *add_config)
3347 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3348 struct virtio_net_config *config;
3349 struct mlx5_core_dev *pfmdev;
3350 struct mlx5_vdpa_dev *mvdev;
3351 struct mlx5_vdpa_net *ndev;
3352 struct mlx5_core_dev *mdev;
3353 u64 device_features;
3361 mdev = mgtdev->madev->mdev;
3362 device_features = mgtdev->mgtdev.supported_features;
3363 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3364 if (add_config->device_features & ~device_features) {
3365 dev_warn(mdev->device,
3366 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3367 add_config->device_features, device_features);
3370 device_features &= add_config->device_features;
3372 device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3374 if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3375 device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3376 dev_warn(mdev->device,
3377 "Must provision minimum features 0x%llx for this device",
3378 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3382 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3383 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3384 dev_warn(mdev->device, "missing support for split virtqueues\n");
3388 max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3389 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3391 dev_warn(mdev->device,
3392 "%d virtqueues are supported. At least 2 are required\n",
3397 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3398 if (add_config->net.max_vq_pairs > max_vqs / 2)
3400 max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3405 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
3406 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3408 return PTR_ERR(ndev);
3410 ndev->mvdev.max_vqs = max_vqs;
3411 mvdev = &ndev->mvdev;
3414 ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3415 ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3416 if (!ndev->vqs || !ndev->event_cbs) {
3422 allocate_irqs(ndev);
3423 init_rwsem(&ndev->reslock);
3424 config = &ndev->config;
3426 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3427 err = config_func_mtu(mdev, add_config->net.mtu);
3432 if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3433 err = query_mtu(mdev, &mtu);
3437 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3440 if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3441 if (get_link_state(mvdev))
3442 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3444 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3447 if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3448 memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3449 /* No bother setting mac address in config if not going to provision _F_MAC */
3450 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3451 device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3452 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3457 if (!is_zero_ether_addr(config->mac)) {
3458 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3459 err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3462 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3464 * We used to clear _F_MAC feature bit if seeing
3465 * zero mac address when device features are not
3466 * specifically provisioned. Keep the behaviour
3467 * so old scripts do not break.
3469 device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3470 } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3471 /* Don't provision zero mac address for _F_MAC */
3472 mlx5_vdpa_warn(&ndev->mvdev,
3473 "No mac address provisioned?\n");
3478 if (device_features & BIT_ULL(VIRTIO_NET_F_MQ))
3479 config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3481 ndev->mvdev.mlx_features = device_features;
3482 mvdev->vdev.dma_dev = &mdev->pdev->dev;
3483 err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3487 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3488 err = mlx5_vdpa_create_mr(mvdev, NULL, 0);
3493 err = alloc_resources(ndev);
3497 ndev->cvq_ent.mvdev = mvdev;
3498 INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3499 mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3505 mvdev->vdev.mdev = &mgtdev->mgtdev;
3506 err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3510 mgtdev->ndev = ndev;
3514 destroy_workqueue(mvdev->wq);
3516 free_resources(ndev);
3518 mlx5_vdpa_destroy_mr(mvdev);
3520 mlx5_vdpa_free_resources(&ndev->mvdev);
3522 if (!is_zero_ether_addr(config->mac))
3523 mlx5_mpfs_del_mac(pfmdev, config->mac);
3525 put_device(&mvdev->vdev.dev);
3529 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3531 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3532 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3533 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3534 struct workqueue_struct *wq;
3536 unregister_link_notifier(ndev);
3537 _vdpa_unregister_device(dev);
3540 destroy_workqueue(wq);
3541 mgtdev->ndev = NULL;
3544 static const struct vdpa_mgmtdev_ops mdev_ops = {
3545 .dev_add = mlx5_vdpa_dev_add,
3546 .dev_del = mlx5_vdpa_dev_del,
3549 static struct virtio_device_id id_table[] = {
3550 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3554 static int mlx5v_probe(struct auxiliary_device *adev,
3555 const struct auxiliary_device_id *id)
3558 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3559 struct mlx5_core_dev *mdev = madev->mdev;
3560 struct mlx5_vdpa_mgmtdev *mgtdev;
3563 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3567 mgtdev->mgtdev.ops = &mdev_ops;
3568 mgtdev->mgtdev.device = mdev->device;
3569 mgtdev->mgtdev.id_table = id_table;
3570 mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3571 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3572 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
3573 BIT_ULL(VDPA_ATTR_DEV_FEATURES);
3574 mgtdev->mgtdev.max_supported_vqs =
3575 MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3576 mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3577 mgtdev->madev = madev;
3579 err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3583 auxiliary_set_drvdata(adev, mgtdev);
3592 static void mlx5v_remove(struct auxiliary_device *adev)
3594 struct mlx5_vdpa_mgmtdev *mgtdev;
3596 mgtdev = auxiliary_get_drvdata(adev);
3597 vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3601 static const struct auxiliary_device_id mlx5v_id_table[] = {
3602 { .name = MLX5_ADEV_NAME ".vnet", },
3606 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3608 static struct auxiliary_driver mlx5v_driver = {
3610 .probe = mlx5v_probe,
3611 .remove = mlx5v_remove,
3612 .id_table = mlx5v_id_table,
3615 module_auxiliary_driver(mlx5v_driver);