2 * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <rdma/ib_verbs.h>
34 #include <linux/mlx5/fs.h>
38 #define IB_DEFAULT_Q_KEY 0xb1b
39 #define MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE 9
41 static int mlx5i_open(struct net_device *netdev);
42 static int mlx5i_close(struct net_device *netdev);
43 static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu);
45 static const struct net_device_ops mlx5i_netdev_ops = {
46 .ndo_open = mlx5i_open,
47 .ndo_stop = mlx5i_close,
48 .ndo_init = mlx5i_dev_init,
49 .ndo_uninit = mlx5i_dev_cleanup,
50 .ndo_change_mtu = mlx5i_change_mtu,
51 .ndo_do_ioctl = mlx5i_ioctl,
54 /* IPoIB mlx5 netdev profile */
55 static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
56 struct mlx5e_params *params)
58 /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
59 MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, false);
60 mlx5e_set_rq_type(mdev, params);
61 mlx5e_init_rq_type_params(mdev, params);
63 /* RQ size in ipoib by default is 512 */
64 params->log_rq_mtu_frames = is_kdump_kernel() ?
65 MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
66 MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE;
68 params->lro_en = false;
69 params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
72 /* Called directly after IPoIB netdevice was created to initialize SW structs */
73 void mlx5i_init(struct mlx5_core_dev *mdev,
74 struct net_device *netdev,
75 const struct mlx5e_profile *profile,
78 struct mlx5e_priv *priv = mlx5i_epriv(netdev);
83 priv->netdev = netdev;
84 priv->profile = profile;
86 mutex_init(&priv->state_lock);
88 mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
89 netdev->mtu = max_mtu;
91 mlx5e_build_nic_params(mdev, &priv->channels.params,
92 profile->max_nch(mdev), netdev->mtu);
93 mlx5i_build_nic_params(mdev, &priv->channels.params);
95 mlx5e_timestamp_init(priv);
98 netdev->hw_features |= NETIF_F_SG;
99 netdev->hw_features |= NETIF_F_IP_CSUM;
100 netdev->hw_features |= NETIF_F_IPV6_CSUM;
101 netdev->hw_features |= NETIF_F_GRO;
102 netdev->hw_features |= NETIF_F_TSO;
103 netdev->hw_features |= NETIF_F_TSO6;
104 netdev->hw_features |= NETIF_F_RXCSUM;
105 netdev->hw_features |= NETIF_F_RXHASH;
107 netdev->netdev_ops = &mlx5i_netdev_ops;
108 netdev->ethtool_ops = &mlx5i_ethtool_ops;
111 /* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */
112 static void mlx5i_cleanup(struct mlx5e_priv *priv)
117 int mlx5i_init_underlay_qp(struct mlx5e_priv *priv)
119 struct mlx5_core_dev *mdev = priv->mdev;
120 struct mlx5i_priv *ipriv = priv->ppriv;
121 struct mlx5_core_qp *qp = &ipriv->qp;
122 struct mlx5_qp_context *context;
126 context = kzalloc(sizeof(*context), GFP_KERNEL);
130 context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
131 context->pri_path.port = 1;
132 context->pri_path.pkey_index = cpu_to_be16(ipriv->pkey_index);
133 context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY);
135 ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp);
137 mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret);
138 goto err_qp_modify_to_err;
140 memset(context, 0, sizeof(*context));
142 ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp);
144 mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret);
145 goto err_qp_modify_to_err;
148 ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp);
150 mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret);
151 goto err_qp_modify_to_err;
157 err_qp_modify_to_err:
158 mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, &context, qp);
163 void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv)
165 struct mlx5i_priv *ipriv = priv->ppriv;
166 struct mlx5_core_dev *mdev = priv->mdev;
167 struct mlx5_qp_context context;
170 err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context,
173 mlx5_core_err(mdev, "Failed to modify qp 2RST, err: %d\n", err);
176 #define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2
178 int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
186 inlen = MLX5_ST_SZ_BYTES(create_qp_in);
187 in = kvzalloc(inlen, GFP_KERNEL);
191 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
192 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD);
193 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
194 MLX5_SET(qpc, qpc, ulp_stateless_offload_mode,
195 MLX5_QP_ENHANCED_ULP_STATELESS_MODE);
197 addr_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
198 MLX5_SET(ads, addr_path, vhca_port_num, 1);
199 MLX5_SET(ads, addr_path, grh, 1);
201 ret = mlx5_core_create_qp(mdev, qp, in, inlen);
203 mlx5_core_err(mdev, "Failed creating IPoIB QP err : %d\n", ret);
212 void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
214 mlx5_core_destroy_qp(mdev, qp);
217 static int mlx5i_init_tx(struct mlx5e_priv *priv)
219 struct mlx5i_priv *ipriv = priv->ppriv;
222 err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp);
224 mlx5_core_warn(priv->mdev, "create underlay QP failed, %d\n", err);
228 err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]);
230 mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
231 goto err_destroy_underlay_qp;
236 err_destroy_underlay_qp:
237 mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
241 static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
243 struct mlx5i_priv *ipriv = priv->ppriv;
245 mlx5e_destroy_tis(priv->mdev, priv->tisn[0]);
246 mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
249 static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
251 struct ttc_params ttc_params = {};
254 priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
255 MLX5_FLOW_NAMESPACE_KERNEL);
260 err = mlx5e_arfs_create_tables(priv);
262 netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n",
264 priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
267 mlx5e_set_ttc_basic_params(priv, &ttc_params);
268 mlx5e_set_inner_ttc_ft_params(&ttc_params);
269 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
270 ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn;
272 err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc);
274 netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n",
276 goto err_destroy_arfs_tables;
279 mlx5e_set_ttc_ft_params(&ttc_params);
280 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
281 ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn;
283 err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
285 netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
287 goto err_destroy_inner_ttc_table;
292 err_destroy_inner_ttc_table:
293 mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
294 err_destroy_arfs_tables:
295 mlx5e_arfs_destroy_tables(priv);
300 static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
302 mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
303 mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
304 mlx5e_arfs_destroy_tables(priv);
307 static int mlx5i_init_rx(struct mlx5e_priv *priv)
311 err = mlx5e_create_indirect_rqt(priv);
315 err = mlx5e_create_direct_rqts(priv);
317 goto err_destroy_indirect_rqts;
319 err = mlx5e_create_indirect_tirs(priv);
321 goto err_destroy_direct_rqts;
323 err = mlx5e_create_direct_tirs(priv);
325 goto err_destroy_indirect_tirs;
327 err = mlx5i_create_flow_steering(priv);
329 goto err_destroy_direct_tirs;
333 err_destroy_direct_tirs:
334 mlx5e_destroy_direct_tirs(priv);
335 err_destroy_indirect_tirs:
336 mlx5e_destroy_indirect_tirs(priv);
337 err_destroy_direct_rqts:
338 mlx5e_destroy_direct_rqts(priv);
339 err_destroy_indirect_rqts:
340 mlx5e_destroy_rqt(priv, &priv->indir_rqt);
344 static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
346 mlx5i_destroy_flow_steering(priv);
347 mlx5e_destroy_direct_tirs(priv);
348 mlx5e_destroy_indirect_tirs(priv);
349 mlx5e_destroy_direct_rqts(priv);
350 mlx5e_destroy_rqt(priv, &priv->indir_rqt);
353 static const struct mlx5e_profile mlx5i_nic_profile = {
355 .cleanup = mlx5i_cleanup,
356 .init_tx = mlx5i_init_tx,
357 .cleanup_tx = mlx5i_cleanup_tx,
358 .init_rx = mlx5i_init_rx,
359 .cleanup_rx = mlx5i_cleanup_rx,
360 .enable = NULL, /* mlx5i_enable */
361 .disable = NULL, /* mlx5i_disable */
362 .update_stats = NULL, /* mlx5i_update_stats */
363 .max_nch = mlx5e_get_max_num_channels,
364 .update_carrier = NULL, /* no HW update in IB link */
365 .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
366 .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
367 .max_tc = MLX5I_MAX_NUM_TC,
370 /* mlx5i netdev NDos */
372 static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu)
374 struct mlx5e_priv *priv = mlx5i_epriv(netdev);
375 struct mlx5e_channels new_channels = {};
376 struct mlx5e_params *params;
379 mutex_lock(&priv->state_lock);
381 params = &priv->channels.params;
383 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
384 params->sw_mtu = new_mtu;
385 netdev->mtu = params->sw_mtu;
389 new_channels.params = *params;
390 new_channels.params.sw_mtu = new_mtu;
391 err = mlx5e_open_channels(priv, &new_channels);
395 mlx5e_switch_priv_channels(priv, &new_channels, NULL);
396 netdev->mtu = new_channels.params.sw_mtu;
399 mutex_unlock(&priv->state_lock);
403 int mlx5i_dev_init(struct net_device *dev)
405 struct mlx5e_priv *priv = mlx5i_epriv(dev);
406 struct mlx5i_priv *ipriv = priv->ppriv;
408 /* Set dev address using underlay QP */
409 dev->dev_addr[1] = (ipriv->qp.qpn >> 16) & 0xff;
410 dev->dev_addr[2] = (ipriv->qp.qpn >> 8) & 0xff;
411 dev->dev_addr[3] = (ipriv->qp.qpn) & 0xff;
413 /* Add QPN to net-device mapping to HT */
414 mlx5i_pkey_add_qpn(dev ,ipriv->qp.qpn);
419 int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
421 struct mlx5e_priv *priv = mlx5i_epriv(dev);
425 return mlx5e_hwstamp_set(priv, ifr);
427 return mlx5e_hwstamp_get(priv, ifr);
433 void mlx5i_dev_cleanup(struct net_device *dev)
435 struct mlx5e_priv *priv = mlx5i_epriv(dev);
436 struct mlx5i_priv *ipriv = priv->ppriv;
438 mlx5i_uninit_underlay_qp(priv);
440 /* Delete QPN to net-device mapping from HT */
441 mlx5i_pkey_del_qpn(dev, ipriv->qp.qpn);
444 static int mlx5i_open(struct net_device *netdev)
446 struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
447 struct mlx5i_priv *ipriv = epriv->ppriv;
448 struct mlx5_core_dev *mdev = epriv->mdev;
451 mutex_lock(&epriv->state_lock);
453 set_bit(MLX5E_STATE_OPENED, &epriv->state);
455 err = mlx5i_init_underlay_qp(epriv);
457 mlx5_core_warn(mdev, "prepare underlay qp state failed, %d\n", err);
458 goto err_clear_state_opened_flag;
461 err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn);
463 mlx5_core_warn(mdev, "attach underlay qp to ft failed, %d\n", err);
467 err = mlx5e_open_channels(epriv, &epriv->channels);
469 goto err_remove_fs_underlay_qp;
471 mlx5e_refresh_tirs(epriv, false);
472 mlx5e_activate_priv_channels(epriv);
474 mutex_unlock(&epriv->state_lock);
477 err_remove_fs_underlay_qp:
478 mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
480 mlx5i_uninit_underlay_qp(epriv);
481 err_clear_state_opened_flag:
482 clear_bit(MLX5E_STATE_OPENED, &epriv->state);
483 mutex_unlock(&epriv->state_lock);
487 static int mlx5i_close(struct net_device *netdev)
489 struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
490 struct mlx5i_priv *ipriv = epriv->ppriv;
491 struct mlx5_core_dev *mdev = epriv->mdev;
493 /* May already be CLOSED in case a previous configuration operation
494 * (e.g RX/TX queue size change) that involves close&open failed.
496 mutex_lock(&epriv->state_lock);
498 if (!test_bit(MLX5E_STATE_OPENED, &epriv->state))
501 clear_bit(MLX5E_STATE_OPENED, &epriv->state);
503 netif_carrier_off(epriv->netdev);
504 mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
505 mlx5i_uninit_underlay_qp(epriv);
506 mlx5e_deactivate_priv_channels(epriv);
507 mlx5e_close_channels(&epriv->channels);
509 mutex_unlock(&epriv->state_lock);
513 /* IPoIB RDMA netdev callbacks */
514 static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca,
515 union ib_gid *gid, u16 lid, int set_qkey,
518 struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
519 struct mlx5_core_dev *mdev = epriv->mdev;
520 struct mlx5i_priv *ipriv = epriv->ppriv;
523 mlx5_core_dbg(mdev, "attaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw);
524 err = mlx5_core_attach_mcg(mdev, gid, ipriv->qp.qpn);
526 mlx5_core_warn(mdev, "failed attaching QPN 0x%x, MGID %pI6\n",
527 ipriv->qp.qpn, gid->raw);
530 mlx5_core_dbg(mdev, "%s setting qkey 0x%x\n",
538 static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca,
539 union ib_gid *gid, u16 lid)
541 struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
542 struct mlx5_core_dev *mdev = epriv->mdev;
543 struct mlx5i_priv *ipriv = epriv->ppriv;
546 mlx5_core_dbg(mdev, "detaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw);
548 err = mlx5_core_detach_mcg(mdev, gid, ipriv->qp.qpn);
550 mlx5_core_dbg(mdev, "failed detaching QPN 0x%x, MGID %pI6\n",
551 ipriv->qp.qpn, gid->raw);
556 static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb,
557 struct ib_ah *address, u32 dqpn)
559 struct mlx5e_priv *epriv = mlx5i_epriv(dev);
560 struct mlx5e_txqsq *sq = epriv->txq2sq[skb_get_queue_mapping(skb)];
561 struct mlx5_ib_ah *mah = to_mah(address);
562 struct mlx5i_priv *ipriv = epriv->ppriv;
564 return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey);
567 static void mlx5i_set_pkey_index(struct net_device *netdev, int id)
569 struct mlx5i_priv *ipriv = netdev_priv(netdev);
571 ipriv->pkey_index = (u16)id;
574 static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
576 if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_IB)
579 if (!MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) {
580 mlx5_core_warn(mdev, "IPoIB enhanced offloads are not supported\n");
587 static void mlx5_rdma_netdev_free(struct net_device *netdev)
589 struct mlx5e_priv *priv = mlx5i_epriv(netdev);
590 struct mlx5i_priv *ipriv = priv->ppriv;
591 const struct mlx5e_profile *profile = priv->profile;
593 mlx5e_detach_netdev(priv);
594 profile->cleanup(priv);
595 destroy_workqueue(priv->wq);
597 if (!ipriv->sub_interface) {
598 mlx5i_pkey_qpn_ht_cleanup(netdev);
599 mlx5e_destroy_mdev_resources(priv->mdev);
603 struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
604 struct ib_device *ibdev,
606 void (*setup)(struct net_device *))
608 const struct mlx5e_profile *profile;
609 struct net_device *netdev;
610 struct mlx5i_priv *ipriv;
611 struct mlx5e_priv *epriv;
612 struct rdma_netdev *rn;
617 if (mlx5i_check_required_hca_cap(mdev)) {
618 mlx5_core_warn(mdev, "Accelerated mode is not supported\n");
619 return ERR_PTR(-EOPNOTSUPP);
622 /* TODO: Need to find a better way to check if child device*/
623 sub_interface = (mdev->mlx5e_res.pdn != 0);
626 profile = mlx5i_pkey_get_profile();
628 profile = &mlx5i_nic_profile;
630 nch = profile->max_nch(mdev);
632 netdev = alloc_netdev_mqs(sizeof(struct mlx5i_priv) + sizeof(struct mlx5e_priv),
633 name, NET_NAME_UNKNOWN,
635 nch * MLX5E_MAX_NUM_TC,
638 mlx5_core_warn(mdev, "alloc_netdev_mqs failed\n");
642 ipriv = netdev_priv(netdev);
643 epriv = mlx5i_epriv(netdev);
645 epriv->wq = create_singlethread_workqueue("mlx5i");
647 goto err_free_netdev;
649 ipriv->sub_interface = sub_interface;
650 if (!ipriv->sub_interface) {
651 err = mlx5i_pkey_qpn_ht_init(netdev);
653 mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n");
657 /* This should only be called once per mdev */
658 err = mlx5e_create_mdev_resources(mdev);
663 profile->init(mdev, netdev, profile, ipriv);
665 mlx5e_attach_netdev(epriv);
666 netif_carrier_off(netdev);
668 /* set rdma_netdev func pointers */
671 rn->send = mlx5i_xmit;
672 rn->attach_mcast = mlx5i_attach_mcast;
673 rn->detach_mcast = mlx5i_detach_mcast;
674 rn->set_id = mlx5i_set_pkey_index;
676 netdev->priv_destructor = mlx5_rdma_netdev_free;
677 netdev->needs_free_netdev = 1;
682 mlx5i_pkey_qpn_ht_cleanup(netdev);
684 destroy_workqueue(epriv->wq);
690 EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);