#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
#include "cmd.h"
+#include <linux/mlx5/vport.h>
#define DRIVER_NAME "mlx5_ib"
-#define DRIVER_VERSION "2.2-1"
-#define DRIVER_RELDATE "Feb 2014"
+#define DRIVER_VERSION "5.0-0"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
- DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
+ DRIVER_VERSION "\n";
enum {
MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
}
+static int get_port_state(struct ib_device *ibdev,
+ u8 port_num,
+ enum ib_port_state *state)
+{
+ struct ib_port_attr attr;
+ int ret;
+
+ memset(&attr, 0, sizeof(attr));
+ ret = mlx5_ib_query_port(ibdev, port_num, &attr);
+ if (!ret)
+ *state = attr.state;
+ return ret;
+}
+
static int mlx5_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
write_unlock(&ibdev->roce.netdev_lock);
break;
+ case NETDEV_CHANGE:
case NETDEV_UP:
case NETDEV_DOWN: {
struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
&& ibdev->ib_active) {
struct ib_event ibev = { };
+ enum ib_port_state port_state;
+
+ if (get_port_state(&ibdev->ib_dev, 1, &port_state))
+ return NOTIFY_DONE;
+ if (ibdev->roce.last_port_state == port_state)
+ return NOTIFY_DONE;
+
+ ibdev->roce.last_port_state = port_state;
ibev.device = &ibdev->ib_dev;
- ibev.event = (event == NETDEV_UP) ?
- IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+ if (port_state == IB_PORT_DOWN)
+ ibev.event = IB_EVENT_PORT_ERR;
+ else if (port_state == IB_PORT_ACTIVE)
+ ibev.event = IB_EVENT_PORT_ACTIVE;
+ else
+ return NOTIFY_DONE;
+
ibev.element.port_num = 1;
ib_dispatch_event(&ibev);
}
return 0;
}
-static void mlx5_query_port_roce(struct ib_device *device, u8 port_num,
- struct ib_port_attr *props)
+static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+ struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
enum ib_mtu ndev_ib_mtu;
u16 qkey_viol_cntr;
u32 eth_prot_oper;
+ int err;
/* Possible bad flows are checked before filling out props so in case
* of an error it will still be zeroed out.
*/
- if (mlx5_query_port_eth_proto_oper(mdev, ð_prot_oper, port_num))
- return;
+ err = mlx5_query_port_eth_proto_oper(mdev, ð_prot_oper, port_num);
+ if (err)
+ return err;
translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
&props->active_width);
ndev = mlx5_ib_get_netdev(device, port_num);
if (!ndev)
- return;
+ return 0;
if (mlx5_lag_is_active(dev->mdev)) {
rcu_read_lock();
dev_put(ndev);
props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
+ return 0;
}
-static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- void *mlx5_addr)
+static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
{
-#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
- char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
- source_l3_address);
- void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
- source_mac_47_32);
+ enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ u8 roce_version = 0;
+ u8 roce_l3_type = 0;
+ bool vlan = false;
+ u8 mac[ETH_ALEN];
+ u16 vlan_id = 0;
- if (!gid)
- return;
-
- ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
+ if (gid) {
+ gid_type = attr->gid_type;
+ ether_addr_copy(mac, attr->ndev->dev_addr);
- if (is_vlan_dev(attr->ndev)) {
- MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
- MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
+ if (is_vlan_dev(attr->ndev)) {
+ vlan = true;
+ vlan_id = vlan_dev_vlan_id(attr->ndev);
+ }
}
- switch (attr->gid_type) {
+ switch (gid_type) {
case IB_GID_TYPE_IB:
- MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
+ roce_version = MLX5_ROCE_VERSION_1;
break;
case IB_GID_TYPE_ROCE_UDP_ENCAP:
- MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
+ roce_version = MLX5_ROCE_VERSION_2;
+ if (ipv6_addr_v4mapped((void *)gid))
+ roce_l3_type = MLX5_ROCE_L3_TYPE_IPV4;
+ else
+ roce_l3_type = MLX5_ROCE_L3_TYPE_IPV6;
break;
default:
- WARN_ON(true);
+ mlx5_ib_warn(dev, "Unexpected GID type %u\n", gid_type);
}
- if (attr->gid_type != IB_GID_TYPE_IB) {
- if (ipv6_addr_v4mapped((void *)gid))
- MLX5_SET_RA(mlx5_addr, roce_l3_type,
- MLX5_ROCE_L3_TYPE_IPV4);
- else
- MLX5_SET_RA(mlx5_addr, roce_l3_type,
- MLX5_ROCE_L3_TYPE_IPV6);
- }
-
- if ((attr->gid_type == IB_GID_TYPE_IB) ||
- !ipv6_addr_v4mapped((void *)gid))
- memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
- else
- memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
-}
-
-static int set_roce_addr(struct ib_device *device, u8 port_num,
- unsigned int index,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr)
-{
- struct mlx5_ib_dev *dev = to_mdev(device);
- u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
- void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
- enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
-
- if (ll != IB_LINK_LAYER_ETHERNET)
- return -EINVAL;
-
- ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
-
- MLX5_SET(set_roce_address_in, in, roce_address_index, index);
- MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
- return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+ return mlx5_core_roce_gid_set(dev->mdev, index, roce_version,
+ roce_l3_type, gid->raw, mac, vlan,
+ vlan_id);
}
static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
const struct ib_gid_attr *attr,
__always_unused void **context)
{
- return set_roce_addr(device, port_num, index, gid, attr);
+ return set_roce_addr(to_mdev(device), port_num, index, gid, attr);
}
static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
unsigned int index, __always_unused void **context)
{
- return set_roce_addr(device, port_num, index, NULL, NULL);
+ return set_roce_addr(to_mdev(device), port_num, index, NULL, NULL);
}
__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
u8 atomic_req_8B_endianness_mode =
- MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode);
+ MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianness_mode);
/* Check if HW supports 8 bytes standard atomic operations and capable
* of host endianness respond
props->device_cap_flags |= IB_DEVICE_UD_TSO;
}
+ if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) &&
+ MLX5_CAP_GEN(dev->mdev, general_notification_event))
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP;
+
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
/* Legacy bit to support old userspace libraries */
int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
+ unsigned int count;
+ int ret;
+
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
- return mlx5_query_mad_ifc_port(ibdev, port, props);
+ ret = mlx5_query_mad_ifc_port(ibdev, port, props);
+ break;
case MLX5_VPORT_ACCESS_METHOD_HCA:
- return mlx5_query_hca_port(ibdev, port, props);
+ ret = mlx5_query_hca_port(ibdev, port, props);
+ break;
case MLX5_VPORT_ACCESS_METHOD_NIC:
- mlx5_query_port_roce(ibdev, port, props);
- return 0;
+ ret = mlx5_query_port_roce(ibdev, port, props);
+ break;
default:
- return -EINVAL;
+ ret = -EINVAL;
}
+
+ if (!ret && props) {
+ count = mlx5_core_reserved_gids_count(to_mdev(ibdev)->mdev);
+ props->gid_tbl_len -= count;
+ }
+ return ret;
}
static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
return 0;
}
+static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
+{
+ int err;
+
+ err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
+ if (err)
+ return err;
+
+ if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
+ !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
+ return err;
+
+ mutex_lock(&dev->lb_mutex);
+ dev->user_td++;
+
+ if (dev->user_td == 2)
+ err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
+
+ mutex_unlock(&dev->lb_mutex);
+ return err;
+}
+
+static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
+{
+ mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
+
+ if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
+ !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
+ return;
+
+ mutex_lock(&dev->lb_mutex);
+ dev->user_td--;
+
+ if (dev->user_td < 2)
+ mlx5_nic_vport_update_local_lb(dev->mdev, false);
+
+ mutex_unlock(&dev->lb_mutex);
+}
+
static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
mutex_init(&context->upd_xlt_page_mutex);
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
- err = mlx5_core_alloc_transport_domain(dev->mdev,
- &context->tdn);
+ err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
if (err)
goto out_page;
}
out_td:
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
- mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn);
out_page:
free_page(context->upd_xlt_page);
bfregi = &context->bfregi;
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
- mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn);
free_page(context->upd_xlt_page);
deallocate_uars(dev, context);
if (!is_valid_attr(dev->mdev, flow_attr))
return ERR_PTR(-EINVAL);
- spec = mlx5_vzalloc(sizeof(*spec));
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
handler = kzalloc(sizeof(*handler), GFP_KERNEL);
if (!handler || !spec) {
err = -ENOMEM;
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
}
+static void delay_drop_handler(struct work_struct *work)
+{
+ int err;
+ struct mlx5_ib_delay_drop *delay_drop =
+ container_of(work, struct mlx5_ib_delay_drop,
+ delay_drop_work);
+
+ mutex_lock(&delay_drop->lock);
+ err = mlx5_core_set_delay_drop(delay_drop->dev->mdev,
+ delay_drop->timeout);
+ if (err) {
+ mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n",
+ delay_drop->timeout);
+ delay_drop->activate = false;
+ }
+ mutex_unlock(&delay_drop->lock);
+}
+
static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param)
{
ibev.event = IB_EVENT_CLIENT_REREGISTER;
port = (u8)param;
break;
+ case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
+ schedule_work(&ibdev->delay_drop.delay_drop_work);
+ goto out;
default:
- return;
+ goto out;
}
ibev.device = &ibdev->ib_dev;
if (port < 1 || port > ibdev->num_ports) {
mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
- return;
+ goto out;
}
if (ibdev->ib_active)
if (fatal)
ibdev->ib_active = false;
+
+out:
+ return;
}
static int set_has_smi_cap(struct mlx5_ib_dev *dev)
__be32 val;
int ret, i;
- out = mlx5_vzalloc(outlen);
+ out = kvzalloc(outlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
int ret, i;
int offset = port->cnts.num_q_counters;
- out = mlx5_vzalloc(outlen);
+ out = kvzalloc(outlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
return num_counters;
}
+static void mlx5_ib_free_rdma_netdev(struct net_device *netdev)
+{
+ return mlx5_rdma_netdev_free(netdev);
+}
+
static struct net_device*
mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
u8 port_num,
unsigned char name_assign_type,
void (*setup)(struct net_device *))
{
+ struct net_device *netdev;
+ struct rdma_netdev *rn;
+
if (type != RDMA_NETDEV_IPOIB)
return ERR_PTR(-EOPNOTSUPP);
- return mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
- name, setup);
+ netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
+ name, setup);
+ if (likely(!IS_ERR_OR_NULL(netdev))) {
+ rn = netdev_priv(netdev);
+ rn->free_rdma_netdev = mlx5_ib_free_rdma_netdev;
+ }
+ return netdev;
}
-static void mlx5_ib_free_rdma_netdev(struct net_device *netdev)
+static void cancel_delay_drop(struct mlx5_ib_dev *dev)
{
- return mlx5_rdma_netdev_free(netdev);
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return;
+
+ cancel_work_sync(&dev->delay_drop.delay_drop_work);
+}
+
+static void init_delay_drop(struct mlx5_ib_dev *dev)
+{
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return;
+
+ mutex_init(&dev->delay_drop.lock);
+ dev->delay_drop.dev = dev;
+ dev->delay_drop.activate = false;
+ dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
+ INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
}
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
- if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) {
+ if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev;
- dev->ib_dev.free_rdma_netdev = mlx5_ib_free_rdma_netdev;
- }
+
if (mlx5_core_is_pf(mdev)) {
dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
err = mlx5_enable_eth(dev);
if (err)
goto err_free_port;
+ dev->roce.last_port_state = IB_PORT_DOWN;
}
err = create_dev_resources(&dev->devr);
goto err_odp;
}
+ err = mlx5_ib_init_cong_debugfs(dev);
+ if (err)
+ goto err_cnt;
+
dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
if (!dev->mdev->priv.uar)
- goto err_cnt;
+ goto err_cong;
err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
if (err)
if (err)
goto err_dev;
+ init_delay_drop(dev);
+
for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
err = device_create_file(&dev->ib_dev.dev,
mlx5_class_attributes[i]);
if (err)
- goto err_umrc;
+ goto err_delay_drop;
}
+ if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
+ MLX5_CAP_GEN(mdev, disable_local_lb))
+ mutex_init(&dev->lb_mutex);
+
dev->ib_active = true;
return dev;
-err_umrc:
+err_delay_drop:
+ cancel_delay_drop(dev);
destroy_umrc_res(dev);
err_dev:
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
err_cnt:
+ mlx5_ib_cleanup_cong_debugfs(dev);
+err_cong:
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
struct mlx5_ib_dev *dev = context;
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
+ cancel_delay_drop(dev);
mlx5_remove_netdev_notifier(dev);
ib_unregister_device(&dev->ib_dev);
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
+ mlx5_ib_cleanup_cong_debugfs(dev);
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
destroy_umrc_res(dev);