1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <linux/virtio_config.h>
11 #include <linux/auxiliary_bus.h>
12 #include <linux/mlx5/cq.h>
13 #include <linux/mlx5/qp.h>
14 #include <linux/mlx5/device.h>
15 #include <linux/mlx5/driver.h>
16 #include <linux/mlx5/vport.h>
17 #include <linux/mlx5/fs.h>
18 #include <linux/mlx5/mlx5_ifc_vdpa.h>
19 #include <linux/mlx5/mpfs.h>
20 #include "mlx5_vdpa.h"
21 #include "mlx5_vnet.h"
23 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
24 MODULE_DESCRIPTION("Mellanox VDPA driver");
25 MODULE_LICENSE("Dual BSD/GPL");
27 #define VALID_FEATURES_MASK \
28 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \
29 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \
30 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \
31 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
32 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \
33 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \
34 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \
35 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \
36 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \
37 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \
38 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \
39 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \
40 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
42 #define VALID_STATUS_MASK \
43 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \
44 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
46 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
48 #define MLX5V_UNTAGGED 0x1000
50 struct mlx5_vdpa_cq_buf {
51 struct mlx5_frag_buf_ctrl fbc;
52 struct mlx5_frag_buf frag_buf;
58 struct mlx5_core_cq mcq;
59 struct mlx5_vdpa_cq_buf buf;
64 struct mlx5_vdpa_umem {
65 struct mlx5_frag_buf_ctrl fbc;
66 struct mlx5_frag_buf frag_buf;
72 struct mlx5_core_qp mqp;
73 struct mlx5_frag_buf frag_buf;
79 struct mlx5_vq_restore_info {
91 struct mlx5_vdpa_virtqueue {
98 /* Resources for implementing the notification channel from the device
99 * to the driver. fwqp is the firmware end of an RC connection; the
100 * other end is vqqp used by the driver. cq is where completions are
103 struct mlx5_vdpa_cq cq;
104 struct mlx5_vdpa_qp fwqp;
105 struct mlx5_vdpa_qp vqqp;
107 /* umem resources are required for the virtqueue operation. They're use
108 * is internal and they must be provided by the driver.
110 struct mlx5_vdpa_umem umem1;
111 struct mlx5_vdpa_umem umem2;
112 struct mlx5_vdpa_umem umem3;
118 struct mlx5_vdpa_net *ndev;
124 /* keep last in the struct */
125 struct mlx5_vq_restore_info ri;
128 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
130 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
131 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
137 return idx <= mvdev->max_idx;
140 static void free_resources(struct mlx5_vdpa_net *ndev);
141 static void init_mvqs(struct mlx5_vdpa_net *ndev);
142 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
143 static void teardown_driver(struct mlx5_vdpa_net *ndev);
145 static bool mlx5_vdpa_debug;
147 #define MLX5_CVQ_MAX_ENT 16
149 #define MLX5_LOG_VIO_FLAG(_feature) \
151 if (features & BIT_ULL(_feature)) \
152 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \
155 #define MLX5_LOG_VIO_STAT(_status) \
157 if (status & (_status)) \
158 mlx5_vdpa_info(mvdev, "%s\n", #_status); \
161 /* TODO: cross-endian support */
162 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
164 return virtio_legacy_is_little_endian() ||
165 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
168 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
170 return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
173 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
175 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
178 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
180 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
183 return mvdev->max_vqs;
186 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
188 return idx == ctrl_vq_idx(mvdev);
191 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
193 if (status & ~VALID_STATUS_MASK)
194 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
195 status & ~VALID_STATUS_MASK);
197 if (!mlx5_vdpa_debug)
200 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
201 if (set && !status) {
202 mlx5_vdpa_info(mvdev, "driver resets the device\n");
206 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
207 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
208 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
209 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
210 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
211 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
214 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
216 if (features & ~VALID_FEATURES_MASK)
217 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
218 features & ~VALID_FEATURES_MASK);
220 if (!mlx5_vdpa_debug)
223 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
225 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
227 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
228 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
229 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
230 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
231 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
232 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
233 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
234 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
235 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
236 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
237 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
238 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
239 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
240 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
241 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
242 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
243 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
244 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
245 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
246 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
247 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
248 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
249 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
250 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
251 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
252 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
253 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
254 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
255 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
256 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
257 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
258 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
259 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
260 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
263 static int create_tis(struct mlx5_vdpa_net *ndev)
265 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
266 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
270 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
271 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
272 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
274 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
279 static void destroy_tis(struct mlx5_vdpa_net *ndev)
281 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
284 #define MLX5_VDPA_CQE_SIZE 64
285 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
287 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
289 struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
290 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
291 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
294 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
295 ndev->mvdev.mdev->priv.numa_node);
299 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
301 buf->cqe_size = MLX5_VDPA_CQE_SIZE;
307 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
309 struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
311 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
312 ndev->mvdev.mdev->priv.numa_node);
315 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
317 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
320 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
322 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
325 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
327 struct mlx5_cqe64 *cqe64;
331 for (i = 0; i < buf->nent; i++) {
332 cqe = get_cqe(vcq, i);
334 cqe64->op_own = MLX5_CQE_INVALID << 4;
338 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
340 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
342 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
343 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
349 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
352 vqp->db.db[0] = cpu_to_be32(vqp->head);
355 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
356 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
358 struct mlx5_vdpa_qp *vqp;
362 vqp = fw ? &mvq->fwqp : &mvq->vqqp;
363 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
364 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
366 /* Firmware QP is allocated by the driver for the firmware's
367 * use so we can skip part of the params as they will be chosen by firmware
369 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
370 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
371 MLX5_SET(qpc, qpc, no_sq, 1);
375 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
376 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
377 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
378 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
379 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
380 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
381 MLX5_SET(qpc, qpc, no_sq, 1);
382 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
383 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
384 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
385 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
386 mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
389 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
391 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
392 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
393 ndev->mvdev.mdev->priv.numa_node);
396 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
398 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
401 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
402 struct mlx5_vdpa_qp *vqp)
404 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
405 int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
406 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
413 err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
417 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
420 inlen += vqp->frag_buf.npages * sizeof(__be64);
423 in = kzalloc(inlen, GFP_KERNEL);
429 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
430 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
431 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
432 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
433 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
434 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
436 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
437 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
438 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
443 vqp->mqp.uid = ndev->mvdev.res.uid;
444 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
447 rx_post(vqp, mvq->num_ent);
453 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
456 rq_buf_free(ndev, vqp);
461 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
463 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
465 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
466 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
467 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
468 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
469 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
471 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
472 rq_buf_free(ndev, vqp);
476 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
478 return get_sw_cqe(cq, cq->mcq.cons_index);
481 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
483 struct mlx5_cqe64 *cqe64;
485 cqe64 = next_cqe_sw(vcq);
489 vcq->mcq.cons_index++;
493 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
495 struct mlx5_vdpa_net *ndev = mvq->ndev;
496 struct vdpa_callback *event_cb;
498 event_cb = &ndev->event_cbs[mvq->index];
499 mlx5_cq_set_ci(&mvq->cq.mcq);
501 /* make sure CQ cosumer update is visible to the hardware before updating
502 * RX doorbell record.
505 rx_post(&mvq->vqqp, num);
506 if (event_cb->callback)
507 event_cb->callback(event_cb->private);
510 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
512 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
513 struct mlx5_vdpa_net *ndev = mvq->ndev;
514 void __iomem *uar_page = ndev->mvdev.res.uar->map;
517 while (!mlx5_vdpa_poll_one(&mvq->cq)) {
519 if (num > mvq->num_ent / 2) {
520 /* If completions keep coming while we poll, we want to
521 * let the hardware know that we consumed them by
522 * updating the doorbell record. We also let vdpa core
523 * know about this so it passes it on the virtio driver
526 mlx5_vdpa_handle_completions(mvq, num);
532 mlx5_vdpa_handle_completions(mvq, num);
534 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
537 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
539 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
540 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
541 void __iomem *uar_page = ndev->mvdev.res.uar->map;
542 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
543 struct mlx5_vdpa_cq *vcq = &mvq->cq;
551 err = mlx5_db_alloc(mdev, &vcq->db);
555 vcq->mcq.set_ci_db = vcq->db.db;
556 vcq->mcq.arm_db = vcq->db.db + 1;
557 vcq->mcq.cqe_sz = 64;
559 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
563 cq_frag_buf_init(vcq, &vcq->buf);
565 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
566 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
567 in = kzalloc(inlen, GFP_KERNEL);
573 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
574 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
575 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
577 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
578 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
580 /* Use vector 0 by default. Consider adding code to choose least used
583 err = mlx5_comp_eqn_get(mdev, 0, &eqn);
587 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
588 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
589 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
590 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
591 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
593 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
597 vcq->mcq.comp = mlx5_vdpa_cq_comp;
599 vcq->mcq.set_ci_db = vcq->db.db;
600 vcq->mcq.arm_db = vcq->db.db + 1;
601 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
608 cq_frag_buf_free(ndev, &vcq->buf);
610 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
614 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
616 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
617 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
618 struct mlx5_vdpa_cq *vcq = &mvq->cq;
620 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
621 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
624 cq_frag_buf_free(ndev, &vcq->buf);
625 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
628 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
629 struct mlx5_vdpa_umem **umemp)
631 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
637 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
638 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
639 *umemp = &mvq->umem1;
642 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
643 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
644 *umemp = &mvq->umem2;
647 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
648 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
649 *umemp = &mvq->umem3;
652 (*umemp)->size = p_a * mvq->num_ent + p_b;
655 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
657 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
660 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
663 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
668 struct mlx5_vdpa_umem *umem;
670 set_umem_size(ndev, mvq, num, &umem);
671 err = umem_frag_buf_alloc(ndev, umem, umem->size);
675 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
677 in = kzalloc(inlen, GFP_KERNEL);
683 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
684 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
685 um = MLX5_ADDR_OF(create_umem_in, in, umem);
686 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
687 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
689 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
690 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
692 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
694 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
699 umem->id = MLX5_GET(create_umem_out, out, umem_id);
706 umem_frag_buf_free(ndev, umem);
710 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
712 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
713 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
714 struct mlx5_vdpa_umem *umem;
728 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
729 MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
730 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
733 umem_frag_buf_free(ndev, umem);
736 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
741 for (num = 1; num <= 3; num++) {
742 err = create_umem(ndev, mvq, num);
749 for (num--; num > 0; num--)
750 umem_destroy(ndev, mvq, num);
755 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
759 for (num = 3; num > 0; num--)
760 umem_destroy(ndev, mvq, num);
763 static int get_queue_type(struct mlx5_vdpa_net *ndev)
767 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
769 /* prefer split queue */
770 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
771 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
773 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
775 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
778 static bool vq_is_tx(u16 idx)
784 MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
785 MLX5_VIRTIO_NET_F_HOST_ECN = 4,
786 MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
787 MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
788 MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
789 MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
790 MLX5_VIRTIO_NET_F_CSUM = 10,
791 MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
792 MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
795 static u16 get_features(u64 features)
797 return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
798 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
799 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
800 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
801 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
802 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
803 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
804 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
807 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
809 return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
810 BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
813 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
815 return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
816 (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
817 pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
820 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
822 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
823 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
831 err = umems_create(ndev, mvq);
835 in = kzalloc(inlen, GFP_KERNEL);
841 mlx_features = get_features(ndev->mvdev.actual_features);
842 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
844 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
845 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
846 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
848 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
849 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
850 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
851 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
853 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
855 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
856 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
858 if (vq_is_tx(mvq->index))
859 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
862 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
863 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
865 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
866 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
869 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
870 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
871 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
872 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
873 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
874 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
875 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
876 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
877 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
878 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
879 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
880 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
881 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
882 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
883 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
884 if (counters_supported(&ndev->mvdev))
885 MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
887 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
891 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
893 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
900 umems_destroy(ndev, mvq);
904 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
906 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
907 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
909 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
910 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
911 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
912 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
913 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
914 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
915 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
916 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
919 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
920 umems_destroy(ndev, mvq);
923 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
925 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
928 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
930 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
933 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
934 int *outlen, u32 qpn, u32 rqpn)
940 case MLX5_CMD_OP_2RST_QP:
941 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
942 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
943 *in = kzalloc(*inlen, GFP_KERNEL);
944 *out = kzalloc(*outlen, GFP_KERNEL);
948 MLX5_SET(qp_2rst_in, *in, opcode, cmd);
949 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
950 MLX5_SET(qp_2rst_in, *in, qpn, qpn);
952 case MLX5_CMD_OP_RST2INIT_QP:
953 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
954 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
955 *in = kzalloc(*inlen, GFP_KERNEL);
956 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
960 MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
961 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
962 MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
963 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
964 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
965 MLX5_SET(qpc, qpc, rwe, 1);
966 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
967 MLX5_SET(ads, pp, vhca_port_num, 1);
969 case MLX5_CMD_OP_INIT2RTR_QP:
970 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
971 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
972 *in = kzalloc(*inlen, GFP_KERNEL);
973 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
977 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
978 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
979 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
980 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
981 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
982 MLX5_SET(qpc, qpc, log_msg_max, 30);
983 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
984 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
985 MLX5_SET(ads, pp, fl, 1);
987 case MLX5_CMD_OP_RTR2RTS_QP:
988 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
989 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
990 *in = kzalloc(*inlen, GFP_KERNEL);
991 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
995 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
996 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
997 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
998 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
999 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1000 MLX5_SET(ads, pp, ack_timeout, 14);
1001 MLX5_SET(qpc, qpc, retry_count, 7);
1002 MLX5_SET(qpc, qpc, rnr_retry, 7);
1005 goto outerr_nullify;
1018 static void free_inout(void *in, void *out)
1024 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1025 * firmware. The fw argument indicates whether the subjected QP is the one used
1028 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1036 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1040 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1041 free_inout(in, out);
1045 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1049 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1053 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1057 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1061 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1065 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1069 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1073 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1076 struct mlx5_virtq_attr {
1078 u16 available_index;
1082 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1083 struct mlx5_virtq_attr *attr)
1085 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1086 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1092 out = kzalloc(outlen, GFP_KERNEL);
1096 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1098 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1099 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1100 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1101 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1102 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1106 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1107 memset(attr, 0, sizeof(*attr));
1108 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1109 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1110 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1119 static bool is_valid_state_change(int oldstate, int newstate)
1122 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1123 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1124 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1125 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1126 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1127 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1133 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1135 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1136 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1142 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1145 if (!is_valid_state_change(mvq->fw_state, state))
1148 in = kzalloc(inlen, GFP_KERNEL);
1152 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1154 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1155 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1156 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1157 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1159 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1160 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1161 MLX5_VIRTQ_MODIFY_MASK_STATE);
1162 MLX5_SET(virtio_net_q_object, obj_context, state, state);
1163 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1166 mvq->fw_state = state;
1171 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1173 u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1174 u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1178 if (!counters_supported(&ndev->mvdev))
1181 cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1183 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1184 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1185 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1187 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1191 mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1196 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1198 u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1199 u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1201 if (!counters_supported(&ndev->mvdev))
1204 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1205 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1206 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1207 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1208 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1209 mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1212 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1214 struct vdpa_callback *cb = priv;
1217 return cb->callback(cb->private);
1222 static void alloc_vector(struct mlx5_vdpa_net *ndev,
1223 struct mlx5_vdpa_virtqueue *mvq)
1225 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1226 struct mlx5_vdpa_irq_pool_entry *ent;
1230 for (i = 0; i < irqp->num_ent; i++) {
1231 ent = &irqp->entries[i];
1233 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1234 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1235 ent->dev_id = &ndev->event_cbs[mvq->index];
1236 err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1237 ent->name, ent->dev_id);
1242 mvq->map = ent->map;
1248 static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1249 struct mlx5_vdpa_virtqueue *mvq)
1251 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1254 for (i = 0; i < irqp->num_ent; i++)
1255 if (mvq->map.virq == irqp->entries[i].map.virq) {
1256 free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1257 irqp->entries[i].used = false;
1262 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1264 u16 idx = mvq->index;
1270 if (mvq->initialized)
1273 err = cq_create(ndev, idx, mvq->num_ent);
1277 err = qp_create(ndev, mvq, &mvq->fwqp);
1281 err = qp_create(ndev, mvq, &mvq->vqqp);
1285 err = connect_qps(ndev, mvq);
1289 err = counter_set_alloc(ndev, mvq);
1293 alloc_vector(ndev, mvq);
1294 err = create_virtqueue(ndev, mvq);
1299 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1301 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1307 mvq->initialized = true;
1311 destroy_virtqueue(ndev, mvq);
1313 dealloc_vector(ndev, mvq);
1314 counter_set_dealloc(ndev, mvq);
1316 qp_destroy(ndev, &mvq->vqqp);
1318 qp_destroy(ndev, &mvq->fwqp);
1320 cq_destroy(ndev, idx);
1324 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1326 struct mlx5_virtq_attr attr;
1328 if (!mvq->initialized)
1331 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1334 if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1335 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1337 if (query_virtqueue(ndev, mvq, &attr)) {
1338 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1341 mvq->avail_idx = attr.available_index;
1342 mvq->used_idx = attr.used_index;
1345 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1349 for (i = 0; i < ndev->mvdev.max_vqs; i++)
1350 suspend_vq(ndev, &ndev->vqs[i]);
1353 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1355 if (!mvq->initialized)
1358 suspend_vq(ndev, mvq);
1359 destroy_virtqueue(ndev, mvq);
1360 dealloc_vector(ndev, mvq);
1361 counter_set_dealloc(ndev, mvq);
1362 qp_destroy(ndev, &mvq->vqqp);
1363 qp_destroy(ndev, &mvq->fwqp);
1364 cq_destroy(ndev, mvq->index);
1365 mvq->initialized = false;
1368 static int create_rqt(struct mlx5_vdpa_net *ndev)
1370 int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1371 int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1379 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1380 in = kzalloc(inlen, GFP_KERNEL);
1384 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1385 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1387 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1388 MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1389 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1390 for (i = 0, j = 0; i < act_sz; i++, j += 2)
1391 list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1393 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1394 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1402 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1404 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1406 int act_sz = roundup_pow_of_two(num / 2);
1414 inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1415 in = kzalloc(inlen, GFP_KERNEL);
1419 MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1420 MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1421 rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1422 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1424 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1425 for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1426 list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1428 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1429 err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1437 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1439 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1442 static int create_tir(struct mlx5_vdpa_net *ndev)
1444 #define HASH_IP_L4PORTS \
1445 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \
1446 MLX5_HASH_FIELD_SEL_L4_DPORT)
1447 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1448 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1449 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1450 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1451 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1458 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1462 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1463 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1464 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1466 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1467 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1468 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1469 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1471 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1472 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1473 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1474 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1476 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1477 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1479 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1484 mlx5_vdpa_add_tirn(ndev);
1488 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1490 mlx5_vdpa_remove_tirn(ndev);
1491 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1494 #define MAX_STEERING_ENT 0x8000
1495 #define MAX_STEERING_GROUPS 2
1497 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1503 static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1504 struct macvlan_node *node,
1505 struct mlx5_flow_act *flow_act,
1506 struct mlx5_flow_destination *dests)
1508 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1511 node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1512 if (IS_ERR(node->ucast_counter.counter))
1513 return PTR_ERR(node->ucast_counter.counter);
1515 node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1516 if (IS_ERR(node->mcast_counter.counter)) {
1517 err = PTR_ERR(node->mcast_counter.counter);
1518 goto err_mcast_counter;
1521 dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1522 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1526 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1533 static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1534 struct macvlan_node *node)
1536 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1537 mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1538 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1542 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1543 struct macvlan_node *node)
1545 struct mlx5_flow_destination dests[NUM_DESTS] = {};
1546 struct mlx5_flow_act flow_act = {};
1547 struct mlx5_flow_spec *spec;
1555 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1559 vid = key2vid(node->macvlan);
1560 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1561 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1562 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1563 dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1564 dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1565 eth_broadcast_addr(dmac_c);
1566 ether_addr_copy(dmac_v, mac);
1567 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1568 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1569 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1572 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1573 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1575 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1576 dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1577 dests[0].tir_num = ndev->res.tirn;
1578 err = add_steering_counters(ndev, node, &flow_act, dests);
1582 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1583 dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1585 node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1586 if (IS_ERR(node->ucast_rule)) {
1587 err = PTR_ERR(node->ucast_rule);
1591 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1592 dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1595 memset(dmac_c, 0, ETH_ALEN);
1596 memset(dmac_v, 0, ETH_ALEN);
1599 node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1600 if (IS_ERR(node->mcast_rule)) {
1601 err = PTR_ERR(node->mcast_rule);
1605 mlx5_vdpa_add_rx_counters(ndev, node);
1609 mlx5_del_flow_rules(node->ucast_rule);
1611 remove_steering_counters(ndev, node);
1617 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1618 struct macvlan_node *node)
1620 mlx5_vdpa_remove_rx_counters(ndev, node);
1621 mlx5_del_flow_rules(node->ucast_rule);
1622 mlx5_del_flow_rules(node->mcast_rule);
1625 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1630 vlan = MLX5V_UNTAGGED;
1632 val = (u64)vlan << 48 |
1643 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1645 struct macvlan_node *pos;
1648 idx = hash_64(value, 8); // tbd 8
1649 hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1650 if (pos->macvlan == value)
1656 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
1658 struct macvlan_node *ptr;
1663 val = search_val(mac, vid, tagged);
1664 if (mac_vlan_lookup(ndev, val))
1667 ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1671 ptr->tagged = tagged;
1674 err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
1678 idx = hash_64(val, 8);
1679 hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1687 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1689 struct macvlan_node *ptr;
1691 ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1695 hlist_del(&ptr->hlist);
1696 mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
1697 remove_steering_counters(ndev, ptr);
1701 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1703 struct macvlan_node *pos;
1704 struct hlist_node *n;
1707 for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1708 hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1709 hlist_del(&pos->hlist);
1710 mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
1711 remove_steering_counters(ndev, pos);
1717 static int setup_steering(struct mlx5_vdpa_net *ndev)
1719 struct mlx5_flow_table_attr ft_attr = {};
1720 struct mlx5_flow_namespace *ns;
1723 ft_attr.max_fte = MAX_STEERING_ENT;
1724 ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1726 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1728 mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1732 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1733 if (IS_ERR(ndev->rxft)) {
1734 mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1735 return PTR_ERR(ndev->rxft);
1737 mlx5_vdpa_add_rx_flow_table(ndev);
1739 err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1746 mlx5_vdpa_remove_rx_flow_table(ndev);
1747 mlx5_destroy_flow_table(ndev->rxft);
1751 static void teardown_steering(struct mlx5_vdpa_net *ndev)
1753 clear_mac_vlan_table(ndev);
1754 mlx5_vdpa_remove_rx_flow_table(ndev);
1755 mlx5_destroy_flow_table(ndev->rxft);
1758 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1760 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1761 struct mlx5_control_vq *cvq = &mvdev->cvq;
1762 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1763 struct mlx5_core_dev *pfmdev;
1765 u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1767 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1769 case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1770 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1771 if (read != ETH_ALEN)
1774 if (!memcmp(ndev->config.mac, mac, 6)) {
1775 status = VIRTIO_NET_OK;
1779 if (is_zero_ether_addr(mac))
1782 if (!is_zero_ether_addr(ndev->config.mac)) {
1783 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1784 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1790 if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1791 mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1796 /* backup the original mac address so that if failed to add the forward rules
1797 * we could restore it
1799 memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1801 memcpy(ndev->config.mac, mac, ETH_ALEN);
1803 /* Need recreate the flow table entry, so that the packet could forward back
1805 mac_vlan_del(ndev, mac_back, 0, false);
1807 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1808 mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1810 /* Although it hardly run here, we still need double check */
1811 if (is_zero_ether_addr(mac_back)) {
1812 mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1816 /* Try to restore original mac address to MFPS table, and try to restore
1817 * the forward rule entry.
1819 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1820 mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
1824 if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
1825 mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
1829 memcpy(ndev->config.mac, mac_back, ETH_ALEN);
1831 if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
1832 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
1837 status = VIRTIO_NET_OK;
1847 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1849 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1850 int cur_qps = ndev->cur_num_vqs / 2;
1854 if (cur_qps > newqps) {
1855 err = modify_rqt(ndev, 2 * newqps);
1859 for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1860 teardown_vq(ndev, &ndev->vqs[i]);
1862 ndev->cur_num_vqs = 2 * newqps;
1864 ndev->cur_num_vqs = 2 * newqps;
1865 for (i = cur_qps * 2; i < 2 * newqps; i++) {
1866 err = setup_vq(ndev, &ndev->vqs[i]);
1870 err = modify_rqt(ndev, 2 * newqps);
1877 for (--i; i >= 2 * cur_qps; --i)
1878 teardown_vq(ndev, &ndev->vqs[i]);
1880 ndev->cur_num_vqs = 2 * cur_qps;
1885 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1887 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1888 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1889 struct mlx5_control_vq *cvq = &mvdev->cvq;
1890 struct virtio_net_ctrl_mq mq;
1895 case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1896 /* This mq feature check aligns with pre-existing userspace
1899 * Without it, an untrusted driver could fake a multiqueue config
1900 * request down to a non-mq device that may cause kernel to
1901 * panic due to uninitialized resources for extra vqs. Even with
1902 * a well behaving guest driver, it is not expected to allow
1903 * changing the number of vqs on a non-mq device.
1905 if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
1908 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1909 if (read != sizeof(mq))
1912 newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1913 if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1914 newqps > ndev->rqt_size)
1917 if (ndev->cur_num_vqs == 2 * newqps) {
1918 status = VIRTIO_NET_OK;
1922 if (!change_num_qps(mvdev, newqps))
1923 status = VIRTIO_NET_OK;
1933 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1935 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1936 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1937 struct mlx5_control_vq *cvq = &mvdev->cvq;
1942 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
1946 case VIRTIO_NET_CTRL_VLAN_ADD:
1947 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1948 if (read != sizeof(vlan))
1951 id = mlx5vdpa16_to_cpu(mvdev, vlan);
1952 if (mac_vlan_add(ndev, ndev->config.mac, id, true))
1955 status = VIRTIO_NET_OK;
1957 case VIRTIO_NET_CTRL_VLAN_DEL:
1958 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1959 if (read != sizeof(vlan))
1962 id = mlx5vdpa16_to_cpu(mvdev, vlan);
1963 mac_vlan_del(ndev, ndev->config.mac, id, true);
1964 status = VIRTIO_NET_OK;
1973 static void mlx5_cvq_kick_handler(struct work_struct *work)
1975 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1976 struct virtio_net_ctrl_hdr ctrl;
1977 struct mlx5_vdpa_wq_ent *wqent;
1978 struct mlx5_vdpa_dev *mvdev;
1979 struct mlx5_control_vq *cvq;
1980 struct mlx5_vdpa_net *ndev;
1984 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
1985 mvdev = wqent->mvdev;
1986 ndev = to_mlx5_vdpa_ndev(mvdev);
1989 down_write(&ndev->reslock);
1991 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1994 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2001 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2006 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2007 if (read != sizeof(ctrl))
2010 cvq->received_desc++;
2011 switch (ctrl.class) {
2012 case VIRTIO_NET_CTRL_MAC:
2013 status = handle_ctrl_mac(mvdev, ctrl.cmd);
2015 case VIRTIO_NET_CTRL_MQ:
2016 status = handle_ctrl_mq(mvdev, ctrl.cmd);
2018 case VIRTIO_NET_CTRL_VLAN:
2019 status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2025 /* Make sure data is written before advancing index */
2028 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2029 vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2030 vringh_kiov_cleanup(&cvq->riov);
2031 vringh_kiov_cleanup(&cvq->wiov);
2033 if (vringh_need_notify_iotlb(&cvq->vring))
2034 vringh_notify(&cvq->vring);
2036 cvq->completed_desc++;
2037 queue_work(mvdev->wq, &wqent->work);
2042 up_write(&ndev->reslock);
2045 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2047 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2048 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2049 struct mlx5_vdpa_virtqueue *mvq;
2051 if (!is_index_valid(mvdev, idx))
2054 if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2055 if (!mvdev->wq || !mvdev->cvq.ready)
2058 queue_work(mvdev->wq, &ndev->cvq_ent.work);
2062 mvq = &ndev->vqs[idx];
2063 if (unlikely(!mvq->ready))
2066 iowrite16(idx, ndev->mvdev.res.kick_addr);
2069 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2070 u64 driver_area, u64 device_area)
2072 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2073 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2074 struct mlx5_vdpa_virtqueue *mvq;
2076 if (!is_index_valid(mvdev, idx))
2079 if (is_ctrl_vq_idx(mvdev, idx)) {
2080 mvdev->cvq.desc_addr = desc_area;
2081 mvdev->cvq.device_addr = device_area;
2082 mvdev->cvq.driver_addr = driver_area;
2086 mvq = &ndev->vqs[idx];
2087 mvq->desc_addr = desc_area;
2088 mvq->device_addr = device_area;
2089 mvq->driver_addr = driver_area;
2093 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2095 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2096 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2097 struct mlx5_vdpa_virtqueue *mvq;
2099 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2102 mvq = &ndev->vqs[idx];
2106 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2108 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2109 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2111 ndev->event_cbs[idx] = *cb;
2112 if (is_ctrl_vq_idx(mvdev, idx))
2113 mvdev->cvq.event_cb = *cb;
2116 static void mlx5_cvq_notify(struct vringh *vring)
2118 struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2120 if (!cvq->event_cb.callback)
2123 cvq->event_cb.callback(cvq->event_cb.private);
2126 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2128 struct mlx5_control_vq *cvq = &mvdev->cvq;
2134 cvq->vring.notify = mlx5_cvq_notify;
2137 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2139 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2140 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2141 struct mlx5_vdpa_virtqueue *mvq;
2144 if (!mvdev->actual_features)
2147 if (!is_index_valid(mvdev, idx))
2150 if (is_ctrl_vq_idx(mvdev, idx)) {
2151 set_cvq_ready(mvdev, ready);
2155 mvq = &ndev->vqs[idx];
2157 suspend_vq(ndev, mvq);
2159 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2161 mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2170 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2172 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2173 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2175 if (!is_index_valid(mvdev, idx))
2178 if (is_ctrl_vq_idx(mvdev, idx))
2179 return mvdev->cvq.ready;
2181 return ndev->vqs[idx].ready;
2184 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2185 const struct vdpa_vq_state *state)
2187 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2188 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2189 struct mlx5_vdpa_virtqueue *mvq;
2191 if (!is_index_valid(mvdev, idx))
2194 if (is_ctrl_vq_idx(mvdev, idx)) {
2195 mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2199 mvq = &ndev->vqs[idx];
2200 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2201 mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2205 mvq->used_idx = state->split.avail_index;
2206 mvq->avail_idx = state->split.avail_index;
2210 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2212 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2213 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2214 struct mlx5_vdpa_virtqueue *mvq;
2215 struct mlx5_virtq_attr attr;
2218 if (!is_index_valid(mvdev, idx))
2221 if (is_ctrl_vq_idx(mvdev, idx)) {
2222 state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2226 mvq = &ndev->vqs[idx];
2227 /* If the virtq object was destroyed, use the value saved at
2228 * the last minute of suspend_vq. This caters for userspace
2229 * that cares about emulating the index after vq is stopped.
2231 if (!mvq->initialized) {
2232 /* Firmware returns a wrong value for the available index.
2233 * Since both values should be identical, we take the value of
2234 * used_idx which is reported correctly.
2236 state->split.avail_index = mvq->used_idx;
2240 err = query_virtqueue(ndev, mvq, &attr);
2242 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2245 state->split.avail_index = attr.used_index;
2249 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2254 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2256 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2258 if (is_ctrl_vq_idx(mvdev, idx))
2259 return MLX5_VDPA_CVQ_GROUP;
2261 return MLX5_VDPA_DATAVQ_GROUP;
2264 static u64 mlx_to_vritio_features(u16 dev_features)
2268 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2269 result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2270 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2271 result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2272 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2273 result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2274 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2275 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2276 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2277 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2278 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2279 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2280 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2281 result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2282 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2283 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2284 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2285 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2290 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2292 u64 mlx_vdpa_features = 0;
2295 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2296 mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2297 if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2298 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2299 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2300 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2301 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2302 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2303 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2304 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2305 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2306 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2308 return mlx_vdpa_features;
2311 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2313 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2314 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2316 print_features(mvdev, ndev->mvdev.mlx_features, false);
2317 return ndev->mvdev.mlx_features;
2320 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2322 /* Minimum features to expect */
2323 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2326 /* Double check features combination sent down by the driver.
2327 * Fail invalid features due to absence of the depended feature.
2329 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2330 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2331 * By failing the invalid features sent down by untrusted drivers,
2332 * we're assured the assumption made upon is_index_valid() and
2333 * is_ctrl_vq_idx() will not be compromised.
2335 if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2336 BIT_ULL(VIRTIO_NET_F_MQ))
2342 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2344 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2348 for (i = 0; i < mvdev->max_vqs; i++) {
2349 err = setup_vq(ndev, &ndev->vqs[i]);
2357 for (--i; i >= 0; i--)
2358 teardown_vq(ndev, &ndev->vqs[i]);
2363 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2365 struct mlx5_vdpa_virtqueue *mvq;
2368 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2369 mvq = &ndev->vqs[i];
2370 if (!mvq->initialized)
2373 teardown_vq(ndev, mvq);
2377 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2379 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2380 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2381 /* MQ supported. CVQ index is right above the last data virtqueue's */
2382 mvdev->max_idx = mvdev->max_vqs;
2384 /* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2390 /* Two data virtqueues only: one for rx and one for tx */
2395 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2397 u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2398 u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2401 MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2402 MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2403 MLX5_SET(query_vport_state_in, in, vport_number, vport);
2405 MLX5_SET(query_vport_state_in, in, other_vport, 1);
2407 err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2411 return MLX5_GET(query_vport_state_out, out, state);
2414 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2416 if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2423 static void update_carrier(struct work_struct *work)
2425 struct mlx5_vdpa_wq_ent *wqent;
2426 struct mlx5_vdpa_dev *mvdev;
2427 struct mlx5_vdpa_net *ndev;
2429 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2430 mvdev = wqent->mvdev;
2431 ndev = to_mlx5_vdpa_ndev(mvdev);
2432 if (get_link_state(mvdev))
2433 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2435 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2437 if (ndev->config_cb.callback)
2438 ndev->config_cb.callback(ndev->config_cb.private);
2443 static int queue_link_work(struct mlx5_vdpa_net *ndev)
2445 struct mlx5_vdpa_wq_ent *wqent;
2447 wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2451 wqent->mvdev = &ndev->mvdev;
2452 INIT_WORK(&wqent->work, update_carrier);
2453 queue_work(ndev->mvdev.wq, &wqent->work);
2457 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2459 struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2460 struct mlx5_eqe *eqe = param;
2461 int ret = NOTIFY_DONE;
2463 if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2464 switch (eqe->sub_type) {
2465 case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2466 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2467 if (queue_link_work(ndev))
2480 static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2482 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2485 ndev->nb.notifier_call = event_handler;
2486 mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2487 ndev->nb_registered = true;
2488 queue_link_work(ndev);
2491 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2493 if (!ndev->nb_registered)
2496 ndev->nb_registered = false;
2497 mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2499 flush_workqueue(ndev->mvdev.wq);
2502 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2504 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2505 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2508 print_features(mvdev, features, true);
2510 err = verify_driver_features(mvdev, features);
2514 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2515 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2516 ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2520 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
2521 * 5.1.6.5.5 "Device operation in multiqueue mode":
2523 * Multiqueue is disabled by default.
2524 * The driver enables multiqueue by sending a command using class
2525 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
2526 * operation, as follows: ...
2528 ndev->cur_num_vqs = 2;
2530 update_cvq_info(mvdev);
2534 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2536 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2537 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2539 ndev->config_cb = *cb;
2542 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2543 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2545 return MLX5_VDPA_MAX_VQ_ENTRIES;
2548 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2550 return VIRTIO_ID_NET;
2553 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2555 return PCI_VENDOR_ID_MELLANOX;
2558 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2560 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2561 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2563 print_status(mvdev, ndev->mvdev.status, false);
2564 return ndev->mvdev.status;
2567 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2569 struct mlx5_vq_restore_info *ri = &mvq->ri;
2570 struct mlx5_virtq_attr attr = {};
2573 if (mvq->initialized) {
2574 err = query_virtqueue(ndev, mvq, &attr);
2579 ri->avail_index = attr.available_index;
2580 ri->used_index = attr.used_index;
2581 ri->ready = mvq->ready;
2582 ri->num_ent = mvq->num_ent;
2583 ri->desc_addr = mvq->desc_addr;
2584 ri->device_addr = mvq->device_addr;
2585 ri->driver_addr = mvq->driver_addr;
2591 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2595 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2596 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2597 save_channel_info(ndev, &ndev->vqs[i]);
2602 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2606 for (i = 0; i < ndev->mvdev.max_vqs; i++)
2607 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2610 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2612 struct mlx5_vdpa_virtqueue *mvq;
2613 struct mlx5_vq_restore_info *ri;
2616 mlx5_clear_vqs(ndev);
2618 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2619 mvq = &ndev->vqs[i];
2624 mvq->avail_idx = ri->avail_index;
2625 mvq->used_idx = ri->used_index;
2626 mvq->ready = ri->ready;
2627 mvq->num_ent = ri->num_ent;
2628 mvq->desc_addr = ri->desc_addr;
2629 mvq->device_addr = ri->device_addr;
2630 mvq->driver_addr = ri->driver_addr;
2635 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2636 struct vhost_iotlb *iotlb, unsigned int asid)
2638 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2642 err = save_channels_info(ndev);
2646 teardown_driver(ndev);
2647 mlx5_vdpa_destroy_mr_asid(mvdev, asid);
2648 err = mlx5_vdpa_create_mr(mvdev, iotlb, asid);
2652 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
2655 restore_channels_info(ndev);
2656 err = setup_driver(mvdev);
2663 mlx5_vdpa_destroy_mr_asid(mvdev, asid);
2668 /* reslock must be held for this function */
2669 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2671 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2674 WARN_ON(!rwsem_is_locked(&ndev->reslock));
2677 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2681 mlx5_vdpa_add_debugfs(ndev);
2682 err = setup_virtqueues(mvdev);
2684 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2688 err = create_rqt(ndev);
2690 mlx5_vdpa_warn(mvdev, "create_rqt\n");
2694 err = create_tir(ndev);
2696 mlx5_vdpa_warn(mvdev, "create_tir\n");
2700 err = setup_steering(ndev);
2702 mlx5_vdpa_warn(mvdev, "setup_steering\n");
2714 teardown_virtqueues(ndev);
2716 mlx5_vdpa_remove_debugfs(ndev->debugfs);
2721 /* reslock must be held for this function */
2722 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2725 WARN_ON(!rwsem_is_locked(&ndev->reslock));
2730 mlx5_vdpa_remove_debugfs(ndev->debugfs);
2731 ndev->debugfs = NULL;
2732 teardown_steering(ndev);
2735 teardown_virtqueues(ndev);
2736 ndev->setup = false;
2739 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2743 for (i = 0; i < ndev->mvdev.max_vqs; i++)
2744 ndev->vqs[i].ready = false;
2746 ndev->mvdev.cvq.ready = false;
2749 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2751 struct mlx5_control_vq *cvq = &mvdev->cvq;
2754 if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
2755 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2756 MLX5_CVQ_MAX_ENT, false,
2757 (struct vring_desc *)(uintptr_t)cvq->desc_addr,
2758 (struct vring_avail *)(uintptr_t)cvq->driver_addr,
2759 (struct vring_used *)(uintptr_t)cvq->device_addr);
2764 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2766 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2767 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2770 print_status(mvdev, status, true);
2772 down_write(&ndev->reslock);
2774 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2775 if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2776 err = setup_cvq_vring(mvdev);
2778 mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2781 register_link_notifier(ndev);
2782 err = setup_driver(mvdev);
2784 mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2788 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2793 ndev->mvdev.status = status;
2794 up_write(&ndev->reslock);
2798 unregister_link_notifier(ndev);
2800 mlx5_vdpa_destroy_mr(&ndev->mvdev);
2801 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2803 up_write(&ndev->reslock);
2806 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
2810 /* default mapping all groups are mapped to asid 0 */
2811 for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
2812 mvdev->group2asid[i] = 0;
2815 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2817 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2818 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2820 print_status(mvdev, 0, true);
2821 mlx5_vdpa_info(mvdev, "performing device reset\n");
2823 down_write(&ndev->reslock);
2824 unregister_link_notifier(ndev);
2825 teardown_driver(ndev);
2826 clear_vqs_ready(ndev);
2827 mlx5_vdpa_destroy_mr(&ndev->mvdev);
2828 ndev->mvdev.status = 0;
2829 ndev->mvdev.suspended = false;
2830 ndev->cur_num_vqs = 0;
2831 ndev->mvdev.cvq.received_desc = 0;
2832 ndev->mvdev.cvq.completed_desc = 0;
2833 memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
2834 ndev->mvdev.actual_features = 0;
2835 init_group_to_asid_map(mvdev);
2836 ++mvdev->generation;
2838 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2839 if (mlx5_vdpa_create_mr(mvdev, NULL, 0))
2840 mlx5_vdpa_warn(mvdev, "create MR failed\n");
2842 up_write(&ndev->reslock);
2847 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2849 return sizeof(struct virtio_net_config);
2852 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2855 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2856 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2858 if (offset + len <= sizeof(struct virtio_net_config))
2859 memcpy(buf, (u8 *)&ndev->config + offset, len);
2862 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2868 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2870 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2872 return mvdev->generation;
2875 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
2881 err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid);
2883 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
2888 err = mlx5_vdpa_change_map(mvdev, iotlb, asid);
2893 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
2894 struct vhost_iotlb *iotlb)
2896 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2897 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2900 down_write(&ndev->reslock);
2901 err = set_map_data(mvdev, iotlb, asid);
2902 up_write(&ndev->reslock);
2906 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
2908 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2910 if (is_ctrl_vq_idx(mvdev, idx))
2913 return mvdev->vdev.dma_dev;
2916 static void free_irqs(struct mlx5_vdpa_net *ndev)
2918 struct mlx5_vdpa_irq_pool_entry *ent;
2921 if (!msix_mode_supported(&ndev->mvdev))
2924 if (!ndev->irqp.entries)
2927 for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
2928 ent = ndev->irqp.entries + i;
2930 pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
2932 kfree(ndev->irqp.entries);
2935 static void mlx5_vdpa_free(struct vdpa_device *vdev)
2937 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2938 struct mlx5_core_dev *pfmdev;
2939 struct mlx5_vdpa_net *ndev;
2941 ndev = to_mlx5_vdpa_ndev(mvdev);
2943 free_resources(ndev);
2944 mlx5_vdpa_destroy_mr(mvdev);
2945 if (!is_zero_ether_addr(ndev->config.mac)) {
2946 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2947 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
2949 mlx5_vdpa_free_resources(&ndev->mvdev);
2951 kfree(ndev->event_cbs);
2955 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
2957 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2958 struct vdpa_notification_area ret = {};
2959 struct mlx5_vdpa_net *ndev;
2962 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2965 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct
2966 * notification to avoid the risk of mapping pages that contain BAR of more
2969 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
2972 ndev = to_mlx5_vdpa_ndev(mvdev);
2973 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
2975 ret.size = PAGE_SIZE;
2979 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
2981 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2982 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2983 struct mlx5_vdpa_virtqueue *mvq;
2985 if (!is_index_valid(mvdev, idx))
2988 if (is_ctrl_vq_idx(mvdev, idx))
2991 mvq = &ndev->vqs[idx];
2995 return mvq->map.virq;
2998 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3000 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3002 return mvdev->actual_features;
3005 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3006 u64 *received_desc, u64 *completed_desc)
3008 u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3009 u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3014 if (!counters_supported(&ndev->mvdev))
3017 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3020 cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3022 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3023 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3024 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3025 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3027 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3031 ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3032 *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3033 *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3037 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3038 struct sk_buff *msg,
3039 struct netlink_ext_ack *extack)
3041 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3042 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3043 struct mlx5_vdpa_virtqueue *mvq;
3044 struct mlx5_control_vq *cvq;
3049 down_read(&ndev->reslock);
3050 if (!is_index_valid(mvdev, idx)) {
3051 NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3056 if (idx == ctrl_vq_idx(mvdev)) {
3058 received_desc = cvq->received_desc;
3059 completed_desc = cvq->completed_desc;
3063 mvq = &ndev->vqs[idx];
3064 err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3066 NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3072 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3075 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3079 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3082 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3088 up_read(&ndev->reslock);
3092 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3094 struct mlx5_control_vq *cvq;
3096 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3103 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3105 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3106 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3107 struct mlx5_vdpa_virtqueue *mvq;
3110 mlx5_vdpa_info(mvdev, "suspending device\n");
3112 down_write(&ndev->reslock);
3113 unregister_link_notifier(ndev);
3114 for (i = 0; i < ndev->cur_num_vqs; i++) {
3115 mvq = &ndev->vqs[i];
3116 suspend_vq(ndev, mvq);
3118 mlx5_vdpa_cvq_suspend(mvdev);
3119 mvdev->suspended = true;
3120 up_write(&ndev->reslock);
3124 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3127 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3129 if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3132 mvdev->group2asid[group] = asid;
3136 static const struct vdpa_config_ops mlx5_vdpa_ops = {
3137 .set_vq_address = mlx5_vdpa_set_vq_address,
3138 .set_vq_num = mlx5_vdpa_set_vq_num,
3139 .kick_vq = mlx5_vdpa_kick_vq,
3140 .set_vq_cb = mlx5_vdpa_set_vq_cb,
3141 .set_vq_ready = mlx5_vdpa_set_vq_ready,
3142 .get_vq_ready = mlx5_vdpa_get_vq_ready,
3143 .set_vq_state = mlx5_vdpa_set_vq_state,
3144 .get_vq_state = mlx5_vdpa_get_vq_state,
3145 .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3146 .get_vq_notification = mlx5_get_vq_notification,
3147 .get_vq_irq = mlx5_get_vq_irq,
3148 .get_vq_align = mlx5_vdpa_get_vq_align,
3149 .get_vq_group = mlx5_vdpa_get_vq_group,
3150 .get_device_features = mlx5_vdpa_get_device_features,
3151 .set_driver_features = mlx5_vdpa_set_driver_features,
3152 .get_driver_features = mlx5_vdpa_get_driver_features,
3153 .set_config_cb = mlx5_vdpa_set_config_cb,
3154 .get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3155 .get_device_id = mlx5_vdpa_get_device_id,
3156 .get_vendor_id = mlx5_vdpa_get_vendor_id,
3157 .get_status = mlx5_vdpa_get_status,
3158 .set_status = mlx5_vdpa_set_status,
3159 .reset = mlx5_vdpa_reset,
3160 .get_config_size = mlx5_vdpa_get_config_size,
3161 .get_config = mlx5_vdpa_get_config,
3162 .set_config = mlx5_vdpa_set_config,
3163 .get_generation = mlx5_vdpa_get_generation,
3164 .set_map = mlx5_vdpa_set_map,
3165 .set_group_asid = mlx5_set_group_asid,
3166 .get_vq_dma_dev = mlx5_get_vq_dma_dev,
3167 .free = mlx5_vdpa_free,
3168 .suspend = mlx5_vdpa_suspend,
3171 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3176 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3180 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3184 static int alloc_resources(struct mlx5_vdpa_net *ndev)
3186 struct mlx5_vdpa_net_resources *res = &ndev->res;
3190 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3194 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3198 err = create_tis(ndev);
3207 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3211 static void free_resources(struct mlx5_vdpa_net *ndev)
3213 struct mlx5_vdpa_net_resources *res = &ndev->res;
3219 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3223 static void init_mvqs(struct mlx5_vdpa_net *ndev)
3225 struct mlx5_vdpa_virtqueue *mvq;
3228 for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3229 mvq = &ndev->vqs[i];
3230 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3233 mvq->fwqp.fw = true;
3234 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3236 for (; i < ndev->mvdev.max_vqs; i++) {
3237 mvq = &ndev->vqs[i];
3238 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3244 struct mlx5_vdpa_mgmtdev {
3245 struct vdpa_mgmt_dev mgtdev;
3246 struct mlx5_adev *madev;
3247 struct mlx5_vdpa_net *ndev;
3250 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3252 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3256 in = kvzalloc(inlen, GFP_KERNEL);
3260 MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3261 MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3262 mtu + MLX5V_ETH_HARD_MTU);
3263 MLX5_SET(modify_nic_vport_context_in, in, opcode,
3264 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3266 err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3272 static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3274 struct mlx5_vdpa_irq_pool_entry *ent;
3277 if (!msix_mode_supported(&ndev->mvdev))
3280 if (!ndev->mvdev.mdev->pdev)
3283 ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3284 if (!ndev->irqp.entries)
3288 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3289 ent = ndev->irqp.entries + i;
3290 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3291 dev_name(&ndev->mvdev.vdev.dev), i);
3292 ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3296 ndev->irqp.num_ent++;
3300 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3301 const struct vdpa_dev_set_config *add_config)
3303 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3304 struct virtio_net_config *config;
3305 struct mlx5_core_dev *pfmdev;
3306 struct mlx5_vdpa_dev *mvdev;
3307 struct mlx5_vdpa_net *ndev;
3308 struct mlx5_core_dev *mdev;
3309 u64 device_features;
3317 mdev = mgtdev->madev->mdev;
3318 device_features = mgtdev->mgtdev.supported_features;
3319 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3320 if (add_config->device_features & ~device_features) {
3321 dev_warn(mdev->device,
3322 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3323 add_config->device_features, device_features);
3326 device_features &= add_config->device_features;
3328 device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3330 if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3331 device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3332 dev_warn(mdev->device,
3333 "Must provision minimum features 0x%llx for this device",
3334 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3338 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3339 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3340 dev_warn(mdev->device, "missing support for split virtqueues\n");
3344 max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3345 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3347 dev_warn(mdev->device,
3348 "%d virtqueues are supported. At least 2 are required\n",
3353 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3354 if (add_config->net.max_vq_pairs > max_vqs / 2)
3356 max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3361 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
3362 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3364 return PTR_ERR(ndev);
3366 ndev->mvdev.max_vqs = max_vqs;
3367 mvdev = &ndev->mvdev;
3370 ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3371 ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3372 if (!ndev->vqs || !ndev->event_cbs) {
3378 allocate_irqs(ndev);
3379 init_rwsem(&ndev->reslock);
3380 config = &ndev->config;
3382 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3383 err = config_func_mtu(mdev, add_config->net.mtu);
3388 if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3389 err = query_mtu(mdev, &mtu);
3393 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3396 if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3397 if (get_link_state(mvdev))
3398 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3400 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3403 if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3404 memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3405 /* No bother setting mac address in config if not going to provision _F_MAC */
3406 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3407 device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3408 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3413 if (!is_zero_ether_addr(config->mac)) {
3414 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3415 err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3418 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3420 * We used to clear _F_MAC feature bit if seeing
3421 * zero mac address when device features are not
3422 * specifically provisioned. Keep the behaviour
3423 * so old scripts do not break.
3425 device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3426 } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3427 /* Don't provision zero mac address for _F_MAC */
3428 mlx5_vdpa_warn(&ndev->mvdev,
3429 "No mac address provisioned?\n");
3434 if (device_features & BIT_ULL(VIRTIO_NET_F_MQ))
3435 config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3437 ndev->mvdev.mlx_features = device_features;
3438 mvdev->vdev.dma_dev = &mdev->pdev->dev;
3439 err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3443 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3444 err = mlx5_vdpa_create_mr(mvdev, NULL, 0);
3449 err = alloc_resources(ndev);
3453 ndev->cvq_ent.mvdev = mvdev;
3454 INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3455 mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3461 mvdev->vdev.mdev = &mgtdev->mgtdev;
3462 err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3466 mgtdev->ndev = ndev;
3470 destroy_workqueue(mvdev->wq);
3472 free_resources(ndev);
3474 mlx5_vdpa_destroy_mr(mvdev);
3476 mlx5_vdpa_free_resources(&ndev->mvdev);
3478 if (!is_zero_ether_addr(config->mac))
3479 mlx5_mpfs_del_mac(pfmdev, config->mac);
3481 put_device(&mvdev->vdev.dev);
3485 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3487 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3488 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3489 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3490 struct workqueue_struct *wq;
3492 mlx5_vdpa_remove_debugfs(ndev->debugfs);
3493 ndev->debugfs = NULL;
3494 unregister_link_notifier(ndev);
3495 _vdpa_unregister_device(dev);
3498 destroy_workqueue(wq);
3499 mgtdev->ndev = NULL;
3502 static const struct vdpa_mgmtdev_ops mdev_ops = {
3503 .dev_add = mlx5_vdpa_dev_add,
3504 .dev_del = mlx5_vdpa_dev_del,
3507 static struct virtio_device_id id_table[] = {
3508 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3512 static int mlx5v_probe(struct auxiliary_device *adev,
3513 const struct auxiliary_device_id *id)
3516 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3517 struct mlx5_core_dev *mdev = madev->mdev;
3518 struct mlx5_vdpa_mgmtdev *mgtdev;
3521 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3525 mgtdev->mgtdev.ops = &mdev_ops;
3526 mgtdev->mgtdev.device = mdev->device;
3527 mgtdev->mgtdev.id_table = id_table;
3528 mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3529 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3530 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
3531 BIT_ULL(VDPA_ATTR_DEV_FEATURES);
3532 mgtdev->mgtdev.max_supported_vqs =
3533 MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3534 mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3535 mgtdev->madev = madev;
3537 err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3541 auxiliary_set_drvdata(adev, mgtdev);
3550 static void mlx5v_remove(struct auxiliary_device *adev)
3552 struct mlx5_vdpa_mgmtdev *mgtdev;
3554 mgtdev = auxiliary_get_drvdata(adev);
3555 vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3559 static const struct auxiliary_device_id mlx5v_id_table[] = {
3560 { .name = MLX5_ADEV_NAME ".vnet", },
3564 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3566 static struct auxiliary_driver mlx5v_driver = {
3568 .probe = mlx5v_probe,
3569 .remove = mlx5v_remove,
3570 .id_table = mlx5v_id_table,
3573 module_auxiliary_driver(mlx5v_driver);