IB/mlx4: Do IBoE GID table resets per-port
[profile/ivi/kernel-x86-ivi.git] / drivers / infiniband / hw / mlx4 / main.c
index 1958c5c..8e10630 100644 (file)
@@ -39,6 +39,8 @@
 #include <linux/inetdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/if_vlan.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
 
 #include <rdma/ib_smi.h>
 #include <rdma/ib_user_verbs.h>
@@ -55,6 +57,7 @@
 #define DRV_RELDATE    "April 4, 2008"
 
 #define MLX4_IB_FLOW_MAX_PRIO 0xFFF
+#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
@@ -92,21 +95,27 @@ static union ib_gid zgid;
 
 static int check_flow_steering_support(struct mlx4_dev *dev)
 {
+       int eth_num_ports = 0;
        int ib_num_ports = 0;
-       int i;
-
-       mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
-               ib_num_ports++;
 
-       if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
-               if (ib_num_ports || mlx4_is_mfunc(dev)) {
-                       pr_warn("Device managed flow steering is unavailable "
-                               "for IB ports or in multifunction env.\n");
-                       return 0;
+       int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
+
+       if (dmfs) {
+               int i;
+               mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
+                       eth_num_ports++;
+               mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+                       ib_num_ports++;
+               dmfs &= (!ib_num_ports ||
+                        (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
+                       (!eth_num_ports ||
+                        (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
+               if (ib_num_ports && mlx4_is_mfunc(dev)) {
+                       pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
+                       dmfs = 0;
                }
-               return 1;
        }
-       return 0;
+       return dmfs;
 }
 
 static int mlx4_ib_query_device(struct ib_device *ibdev,
@@ -165,7 +174,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
                        props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
                else
                        props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
-       if (check_flow_steering_support(dev->dev))
+       if (dev->steering_support ==  MLX4_STEERING_MODE_DEVICE_MANAGED)
                props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
        }
 
@@ -787,7 +796,6 @@ static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
 int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
                   union ib_gid *gid)
 {
-       u8 mac[6];
        struct net_device *ndev;
        int ret = 0;
 
@@ -801,11 +809,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
        spin_unlock(&mdev->iboe.lock);
 
        if (ndev) {
-               rdma_get_mcast_mac((struct in6_addr *)gid, mac);
-               rtnl_lock();
-               dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac);
                ret = 1;
-               rtnl_unlock();
                dev_put(ndev);
        }
 
@@ -819,6 +823,7 @@ struct mlx4_ib_steering {
 };
 
 static int parse_flow_attr(struct mlx4_dev *dev,
+                          u32 qp_num,
                           union ib_flow_spec *ib_spec,
                           struct _rule_hw *mlx4_spec)
 {
@@ -834,6 +839,14 @@ static int parse_flow_attr(struct mlx4_dev *dev,
                mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
                mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
                break;
+       case IB_FLOW_SPEC_IB:
+               type = MLX4_NET_TRANS_RULE_ID_IB;
+               mlx4_spec->ib.l3_qpn =
+                       cpu_to_be32(qp_num);
+               mlx4_spec->ib.qpn_mask =
+                       cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
+               break;
+
 
        case IB_FLOW_SPEC_IPV4:
                type = MLX4_NET_TRANS_RULE_ID_IPV4;
@@ -865,6 +878,115 @@ static int parse_flow_attr(struct mlx4_dev *dev,
        return mlx4_hw_rule_sz(dev, type);
 }
 
+struct default_rules {
+       __u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
+       __u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
+       __u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
+       __u8  link_layer;
+};
+static const struct default_rules default_table[] = {
+       {
+               .mandatory_fields = {IB_FLOW_SPEC_IPV4},
+               .mandatory_not_fields = {IB_FLOW_SPEC_ETH},
+               .rules_create_list = {IB_FLOW_SPEC_IB},
+               .link_layer = IB_LINK_LAYER_INFINIBAND
+       }
+};
+
+static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
+                                        struct ib_flow_attr *flow_attr)
+{
+       int i, j, k;
+       void *ib_flow;
+       const struct default_rules *pdefault_rules = default_table;
+       u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
+
+       for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++,
+            pdefault_rules++) {
+               __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
+               memset(&field_types, 0, sizeof(field_types));
+
+               if (link_layer != pdefault_rules->link_layer)
+                       continue;
+
+               ib_flow = flow_attr + 1;
+               /* we assume the specs are sorted */
+               for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
+                    j < flow_attr->num_of_specs; k++) {
+                       union ib_flow_spec *current_flow =
+                               (union ib_flow_spec *)ib_flow;
+
+                       /* same layer but different type */
+                       if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
+                            (pdefault_rules->mandatory_fields[k] &
+                             IB_FLOW_SPEC_LAYER_MASK)) &&
+                           (current_flow->type !=
+                            pdefault_rules->mandatory_fields[k]))
+                               goto out;
+
+                       /* same layer, try match next one */
+                       if (current_flow->type ==
+                           pdefault_rules->mandatory_fields[k]) {
+                               j++;
+                               ib_flow +=
+                                       ((union ib_flow_spec *)ib_flow)->size;
+                       }
+               }
+
+               ib_flow = flow_attr + 1;
+               for (j = 0; j < flow_attr->num_of_specs;
+                    j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
+                       for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
+                               /* same layer and same type */
+                               if (((union ib_flow_spec *)ib_flow)->type ==
+                                   pdefault_rules->mandatory_not_fields[k])
+                                       goto out;
+
+               return i;
+       }
+out:
+       return -1;
+}
+
+static int __mlx4_ib_create_default_rules(
+               struct mlx4_ib_dev *mdev,
+               struct ib_qp *qp,
+               const struct default_rules *pdefault_rules,
+               struct _rule_hw *mlx4_spec) {
+       int size = 0;
+       int i;
+
+       for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/
+                       sizeof(pdefault_rules->rules_create_list[0]); i++) {
+               int ret;
+               union ib_flow_spec ib_spec;
+               switch (pdefault_rules->rules_create_list[i]) {
+               case 0:
+                       /* no rule */
+                       continue;
+               case IB_FLOW_SPEC_IB:
+                       ib_spec.type = IB_FLOW_SPEC_IB;
+                       ib_spec.size = sizeof(struct ib_flow_spec_ib);
+
+                       break;
+               default:
+                       /* invalid rule */
+                       return -EINVAL;
+               }
+               /* We must put empty rule, qpn is being ignored */
+               ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
+                                     mlx4_spec);
+               if (ret < 0) {
+                       pr_info("invalid parsing\n");
+                       return -EINVAL;
+               }
+
+               mlx4_spec = (void *)mlx4_spec + ret;
+               size += ret;
+       }
+       return size;
+}
+
 static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
                          int domain,
                          enum mlx4_net_trans_promisc_mode flow_type,
@@ -876,6 +998,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
        struct mlx4_ib_dev *mdev = to_mdev(qp->device);
        struct mlx4_cmd_mailbox *mailbox;
        struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+       int default_flow;
 
        static const u16 __mlx4_domain[] = {
                [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
@@ -910,8 +1033,21 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
 
        ib_flow = flow_attr + 1;
        size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+       /* Add default flows */
+       default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
+       if (default_flow >= 0) {
+               ret = __mlx4_ib_create_default_rules(
+                               mdev, qp, default_table + default_flow,
+                               mailbox->buf + size);
+               if (ret < 0) {
+                       mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+                       return -EINVAL;
+               }
+               size += ret;
+       }
        for (i = 0; i < flow_attr->num_of_specs; i++) {
-               ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size);
+               ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
+                                     mailbox->buf + size);
                if (ret < 0) {
                        mlx4_free_cmd_mailbox(mdev->dev, mailbox);
                        return -EINVAL;
@@ -1025,6 +1161,8 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        struct mlx4_ib_qp *mqp = to_mqp(ibqp);
        u64 reg_id;
        struct mlx4_ib_steering *ib_steering = NULL;
+       enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
+               MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
 
        if (mdev->dev->caps.steering_mode ==
            MLX4_STEERING_MODE_DEVICE_MANAGED) {
@@ -1036,7 +1174,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
                                    !!(mqp->flags &
                                       MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
-                                   MLX4_PROT_IB_IPV6, &reg_id);
+                                   prot, &reg_id);
        if (err)
                goto err_malloc;
 
@@ -1055,7 +1193,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 
 err_add:
        mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
-                             MLX4_PROT_IB_IPV6, reg_id);
+                             prot, reg_id);
 err_malloc:
        kfree(ib_steering);
 
@@ -1083,10 +1221,11 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        int err;
        struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
        struct mlx4_ib_qp *mqp = to_mqp(ibqp);
-       u8 mac[6];
        struct net_device *ndev;
        struct mlx4_ib_gid_entry *ge;
        u64 reg_id = 0;
+       enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
+               MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
 
        if (mdev->dev->caps.steering_mode ==
            MLX4_STEERING_MODE_DEVICE_MANAGED) {
@@ -1109,7 +1248,7 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        }
 
        err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
-                                   MLX4_PROT_IB_IPV6, reg_id);
+                                   prot, reg_id);
        if (err)
                return err;
 
@@ -1121,13 +1260,8 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                if (ndev)
                        dev_hold(ndev);
                spin_unlock(&mdev->iboe.lock);
-               rdma_get_mcast_mac((struct in6_addr *)gid, mac);
-               if (ndev) {
-                       rtnl_lock();
-                       dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac);
-                       rtnl_unlock();
+               if (ndev)
                        dev_put(ndev);
-               }
                list_del(&ge->list);
                kfree(ge);
        } else
@@ -1223,7 +1357,8 @@ static struct device_attribute *mlx4_class_attributes[] = {
        &dev_attr_board_id
 };
 
-static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev)
+static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id,
+                                    struct net_device *dev)
 {
        memcpy(eui, dev->dev_addr, 3);
        memcpy(eui + 5, dev->dev_addr + 3, 3);
@@ -1259,161 +1394,346 @@ static void update_gids_task(struct work_struct *work)
                       MLX4_CMD_WRAPPED);
        if (err)
                pr_warn("set port command failed\n");
-       else {
-               memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids);
+       else
                mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
+
+       mlx4_free_cmd_mailbox(dev, mailbox);
+       kfree(gw);
+}
+
+static void reset_gids_task(struct work_struct *work)
+{
+       struct update_gid_work *gw =
+                       container_of(work, struct update_gid_work, work);
+       struct mlx4_cmd_mailbox *mailbox;
+       union ib_gid *gids;
+       int err;
+       struct mlx4_dev *dev = gw->dev->dev;
+
+       mailbox = mlx4_alloc_cmd_mailbox(dev);
+       if (IS_ERR(mailbox)) {
+               pr_warn("reset gid table failed\n");
+               goto free;
+       }
+
+       gids = mailbox->buf;
+       memcpy(gids, gw->gids, sizeof(gw->gids));
+
+       if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) ==
+                                   IB_LINK_LAYER_ETHERNET) {
+               err = mlx4_cmd(dev, mailbox->dma,
+                              MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
+                              1, MLX4_CMD_SET_PORT,
+                              MLX4_CMD_TIME_CLASS_B,
+                              MLX4_CMD_WRAPPED);
+               if (err)
+                       pr_warn(KERN_WARNING
+                               "set port %d command failed\n", gw->port);
        }
 
        mlx4_free_cmd_mailbox(dev, mailbox);
+free:
        kfree(gw);
 }
 
-static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
+static int update_gid_table(struct mlx4_ib_dev *dev, int port,
+                           union ib_gid *gid, int clear,
+                           int default_gid)
 {
-       struct net_device *ndev = dev->iboe.netdevs[port - 1];
        struct update_gid_work *work;
-       struct net_device *tmp;
        int i;
-       u8 *hits;
-       int ret;
-       union ib_gid gid;
-       int free;
-       int found;
        int need_update = 0;
-       u16 vid;
-
-       work = kzalloc(sizeof *work, GFP_ATOMIC);
-       if (!work)
-               return -ENOMEM;
-
-       hits = kzalloc(128, GFP_ATOMIC);
-       if (!hits) {
-               ret = -ENOMEM;
-               goto out;
-       }
+       int free = -1;
+       int found = -1;
+       int max_gids;
 
-       rcu_read_lock();
-       for_each_netdev_rcu(&init_net, tmp) {
-               if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) {
-                       gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
-                       vid = rdma_vlan_dev_vlan_id(tmp);
-                       mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev);
-                       found = 0;
-                       free = -1;
-                       for (i = 0; i < 128; ++i) {
-                               if (free < 0 &&
-                                   !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
-                                       free = i;
-                               if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) {
-                                       hits[i] = 1;
-                                       found = 1;
+       if (default_gid) {
+               free = 0;
+       } else {
+               max_gids = dev->dev->caps.gid_table_len[port];
+               for (i = 1; i < max_gids; ++i) {
+                       if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid,
+                                   sizeof(*gid)))
+                               found = i;
+
+                       if (clear) {
+                               if (found >= 0) {
+                                       need_update = 1;
+                                       dev->iboe.gid_table[port - 1][found] =
+                                               zgid;
                                        break;
                                }
-                       }
+                       } else {
+                               if (found >= 0)
+                                       break;
 
-                       if (!found) {
-                               if (tmp == ndev &&
-                                   (memcmp(&dev->iboe.gid_table[port - 1][0],
-                                           &gid, sizeof gid) ||
-                                    !memcmp(&dev->iboe.gid_table[port - 1][0],
-                                            &zgid, sizeof gid))) {
-                                       dev->iboe.gid_table[port - 1][0] = gid;
-                                       ++need_update;
-                                       hits[0] = 1;
-                               } else if (free >= 0) {
-                                       dev->iboe.gid_table[port - 1][free] = gid;
-                                       hits[free] = 1;
-                                       ++need_update;
-                               }
+                               if (free < 0 &&
+                                   !memcmp(&dev->iboe.gid_table[port - 1][i],
+                                           &zgid, sizeof(*gid)))
+                                       free = i;
                        }
                }
        }
-       rcu_read_unlock();
 
-       for (i = 0; i < 128; ++i)
-               if (!hits[i]) {
-                       if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
-                               ++need_update;
-                       dev->iboe.gid_table[port - 1][i] = zgid;
-               }
+       if (found == -1 && !clear && free >= 0) {
+               dev->iboe.gid_table[port - 1][free] = *gid;
+               need_update = 1;
+       }
 
-       if (need_update) {
-               memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids);
-               INIT_WORK(&work->work, update_gids_task);
-               work->port = port;
-               work->dev = dev;
-               queue_work(wq, &work->work);
-       } else
-               kfree(work);
+       if (!need_update)
+               return 0;
+
+       work = kzalloc(sizeof(*work), GFP_ATOMIC);
+       if (!work)
+               return -ENOMEM;
+
+       memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids));
+       INIT_WORK(&work->work, update_gids_task);
+       work->port = port;
+       work->dev = dev;
+       queue_work(wq, &work->work);
 
-       kfree(hits);
        return 0;
+}
 
-out:
-       kfree(work);
-       return ret;
+static void mlx4_make_default_gid(struct  net_device *dev, union ib_gid *gid)
+{
+       gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+       mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev);
 }
 
-static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event)
+
+static int reset_gid_table(struct mlx4_ib_dev *dev, u8 port)
 {
-       switch (event) {
-       case NETDEV_UP:
-       case NETDEV_CHANGEADDR:
-               update_ipv6_gids(dev, port, 0);
-               break;
+       struct update_gid_work *work;
 
-       case NETDEV_DOWN:
-               update_ipv6_gids(dev, port, 1);
-               dev->iboe.netdevs[port - 1] = NULL;
-       }
+
+       work = kzalloc(sizeof(*work), GFP_ATOMIC);
+       if (!work)
+               return -ENOMEM;
+
+       memset(dev->iboe.gid_table[port - 1], 0, sizeof(work->gids));
+       memset(work->gids, 0, sizeof(work->gids));
+       INIT_WORK(&work->work, reset_gids_task);
+       work->dev = dev;
+       work->port = port;
+       queue_work(wq, &work->work);
+       return 0;
 }
 
-static void netdev_added(struct mlx4_ib_dev *dev, int port)
+static int mlx4_ib_addr_event(int event, struct net_device *event_netdev,
+                             struct mlx4_ib_dev *ibdev, union ib_gid *gid)
 {
-       update_ipv6_gids(dev, port, 0);
+       struct mlx4_ib_iboe *iboe;
+       int port = 0;
+       struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ?
+                               rdma_vlan_dev_real_dev(event_netdev) :
+                               event_netdev;
+       union ib_gid default_gid;
+
+       mlx4_make_default_gid(real_dev, &default_gid);
+
+       if (!memcmp(gid, &default_gid, sizeof(*gid)))
+               return 0;
+
+       if (event != NETDEV_DOWN && event != NETDEV_UP)
+               return 0;
+
+       if ((real_dev != event_netdev) &&
+           (event == NETDEV_DOWN) &&
+           rdma_link_local_addr((struct in6_addr *)gid))
+               return 0;
+
+       iboe = &ibdev->iboe;
+       spin_lock(&iboe->lock);
+
+       for (port = 1; port <= MLX4_MAX_PORTS; ++port)
+               if ((netif_is_bond_master(real_dev) &&
+                    (real_dev == iboe->masters[port - 1])) ||
+                    (!netif_is_bond_master(real_dev) &&
+                    (real_dev == iboe->netdevs[port - 1])))
+                       update_gid_table(ibdev, port, gid,
+                                        event == NETDEV_DOWN, 0);
+
+       spin_unlock(&iboe->lock);
+       return 0;
+
 }
 
-static void netdev_removed(struct mlx4_ib_dev *dev, int port)
+static u8 mlx4_ib_get_dev_port(struct net_device *dev,
+                              struct mlx4_ib_dev *ibdev)
 {
-       update_ipv6_gids(dev, port, 1);
+       u8 port = 0;
+       struct mlx4_ib_iboe *iboe;
+       struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ?
+                               rdma_vlan_dev_real_dev(dev) : dev;
+
+       iboe = &ibdev->iboe;
+
+       for (port = 1; port <= MLX4_MAX_PORTS; ++port)
+               if ((netif_is_bond_master(real_dev) &&
+                    (real_dev == iboe->masters[port - 1])) ||
+                    (!netif_is_bond_master(real_dev) &&
+                    (real_dev == iboe->netdevs[port - 1])))
+                       break;
+
+       if ((port == 0) || (port > MLX4_MAX_PORTS))
+               return 0;
+       else
+               return port;
 }
 
-static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event,
+static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event,
+                               void *ptr)
+{
+       struct mlx4_ib_dev *ibdev;
+       struct in_ifaddr *ifa = ptr;
+       union ib_gid gid;
+       struct net_device *event_netdev = ifa->ifa_dev->dev;
+
+       ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
+
+       ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet);
+
+       mlx4_ib_addr_event(event, event_netdev, ibdev, &gid);
+       return NOTIFY_DONE;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event,
                                void *ptr)
 {
-       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct mlx4_ib_dev *ibdev;
-       struct net_device *oldnd;
+       struct inet6_ifaddr *ifa = ptr;
+       union  ib_gid *gid = (union ib_gid *)&ifa->addr;
+       struct net_device *event_netdev = ifa->idev->dev;
+
+       ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet6);
+
+       mlx4_ib_addr_event(event, event_netdev, ibdev, gid);
+       return NOTIFY_DONE;
+}
+#endif
+
+static void mlx4_ib_get_dev_addr(struct net_device *dev,
+                                struct mlx4_ib_dev *ibdev, u8 port)
+{
+       struct in_device *in_dev;
+#if IS_ENABLED(CONFIG_IPV6)
+       struct inet6_dev *in6_dev;
+       union ib_gid  *pgid;
+       struct inet6_ifaddr *ifp;
+#endif
+       union ib_gid gid;
+
+
+       if ((port == 0) || (port > MLX4_MAX_PORTS))
+               return;
+
+       /* IPv4 gids */
+       in_dev = in_dev_get(dev);
+       if (in_dev) {
+               for_ifa(in_dev) {
+                       /*ifa->ifa_address;*/
+                       ipv6_addr_set_v4mapped(ifa->ifa_address,
+                                              (struct in6_addr *)&gid);
+                       update_gid_table(ibdev, port, &gid, 0, 0);
+               }
+               endfor_ifa(in_dev);
+               in_dev_put(in_dev);
+       }
+#if IS_ENABLED(CONFIG_IPV6)
+       /* IPv6 gids */
+       in6_dev = in6_dev_get(dev);
+       if (in6_dev) {
+               read_lock_bh(&in6_dev->lock);
+               list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
+                       pgid = (union ib_gid *)&ifp->addr;
+                       update_gid_table(ibdev, port, pgid, 0, 0);
+               }
+               read_unlock_bh(&in6_dev->lock);
+               in6_dev_put(in6_dev);
+       }
+#endif
+}
+
+static void mlx4_ib_set_default_gid(struct mlx4_ib_dev *ibdev,
+                                struct  net_device *dev, u8 port)
+{
+       union ib_gid gid;
+       mlx4_make_default_gid(dev, &gid);
+       update_gid_table(ibdev, port, &gid, 0, 1);
+}
+
+static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
+{
+       struct  net_device *dev;
+       struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+       int i;
+
+       for (i = 1; i <= ibdev->num_ports; ++i)
+               if (reset_gid_table(ibdev, i))
+                       return -1;
+
+       read_lock(&dev_base_lock);
+       spin_lock(&iboe->lock);
+
+       for_each_netdev(&init_net, dev) {
+               u8 port = mlx4_ib_get_dev_port(dev, ibdev);
+               if (port)
+                       mlx4_ib_get_dev_addr(dev, ibdev, port);
+       }
+
+       spin_unlock(&iboe->lock);
+       read_unlock(&dev_base_lock);
+
+       return 0;
+}
+
+static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev)
+{
        struct mlx4_ib_iboe *iboe;
        int port;
 
-       if (!net_eq(dev_net(dev), &init_net))
-               return NOTIFY_DONE;
-
-       ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
        iboe = &ibdev->iboe;
 
        spin_lock(&iboe->lock);
        mlx4_foreach_ib_transport_port(port, ibdev->dev) {
-               oldnd = iboe->netdevs[port - 1];
+               struct net_device *old_master = iboe->masters[port - 1];
+               struct net_device *curr_master;
                iboe->netdevs[port - 1] =
                        mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
-               if (oldnd != iboe->netdevs[port - 1]) {
-                       if (iboe->netdevs[port - 1])
-                               netdev_added(ibdev, port);
-                       else
-                               netdev_removed(ibdev, port);
+               if (iboe->netdevs[port - 1])
+                       mlx4_ib_set_default_gid(ibdev,
+                                               iboe->netdevs[port - 1], port);
+               if (iboe->netdevs[port - 1] &&
+                   netif_is_bond_slave(iboe->netdevs[port - 1])) {
+                       iboe->masters[port - 1] = netdev_master_upper_dev_get(
+                               iboe->netdevs[port - 1]);
                }
-       }
+               curr_master = iboe->masters[port - 1];
 
-       if (dev == iboe->netdevs[0] ||
-           (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0]))
-               handle_en_event(ibdev, 1, event);
-       else if (dev == iboe->netdevs[1]
-                || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1]))
-               handle_en_event(ibdev, 2, event);
+               /* if bonding is used it is possible that we add it to masters
+                   only after IP address is assigned to the net bonding
+                   interface */
+               if (curr_master && (old_master != curr_master))
+                       mlx4_ib_get_dev_addr(curr_master, ibdev, port);
+       }
 
        spin_unlock(&iboe->lock);
+}
+
+static int mlx4_ib_netdev_event(struct notifier_block *this,
+                               unsigned long event, void *ptr)
+{
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+       struct mlx4_ib_dev *ibdev;
+
+       if (!net_eq(dev_net(dev), &init_net))
+               return NOTIFY_DONE;
+
+       ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
+       mlx4_ib_scan_netdevs(ibdev);
 
        return NOTIFY_DONE;
 }
@@ -1533,6 +1853,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        int i, j;
        int err;
        struct mlx4_ib_iboe *iboe;
+       int ib_num_ports = 0;
 
        pr_info_once("%s", mlx4_ib_version);
 
@@ -1682,6 +2003,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        }
 
        if (check_flow_steering_support(dev)) {
+               ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
                ibdev->ib_dev.create_flow       = mlx4_ib_create_flow;
                ibdev->ib_dev.destroy_flow      = mlx4_ib_destroy_flow;
 
@@ -1707,11 +2029,42 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
                                ibdev->counters[i] = -1;
        }
 
+       mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+               ib_num_ports++;
+
        spin_lock_init(&ibdev->sm_lock);
        mutex_init(&ibdev->cap_mask_mutex);
 
+       if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
+           ib_num_ports) {
+               ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
+               err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
+                                           MLX4_IB_UC_STEER_QPN_ALIGN,
+                                           &ibdev->steer_qpn_base);
+               if (err)
+                       goto err_counter;
+
+               ibdev->ib_uc_qpns_bitmap =
+                       kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
+                               sizeof(long),
+                               GFP_KERNEL);
+               if (!ibdev->ib_uc_qpns_bitmap) {
+                       dev_err(&dev->pdev->dev, "bit map alloc failed\n");
+                       goto err_steer_qp_release;
+               }
+
+               bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
+
+               err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
+                               dev, ibdev->steer_qpn_base,
+                               ibdev->steer_qpn_base +
+                               ibdev->steer_qpn_count - 1);
+               if (err)
+                       goto err_steer_free_bitmap;
+       }
+
        if (ib_register_device(&ibdev->ib_dev, NULL))
-               goto err_counter;
+               goto err_steer_free_bitmap;
 
        if (mlx4_ib_mad_init(ibdev))
                goto err_reg;
@@ -1719,11 +2072,37 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        if (mlx4_ib_init_sriov(ibdev))
                goto err_mad;
 
-       if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
-               iboe->nb.notifier_call = mlx4_ib_netdev_event;
-               err = register_netdevice_notifier(&iboe->nb);
-               if (err)
-                       goto err_sriov;
+       if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
+               if (!iboe->nb.notifier_call) {
+                       iboe->nb.notifier_call = mlx4_ib_netdev_event;
+                       err = register_netdevice_notifier(&iboe->nb);
+                       if (err) {
+                               iboe->nb.notifier_call = NULL;
+                               goto err_notif;
+                       }
+               }
+               if (!iboe->nb_inet.notifier_call) {
+                       iboe->nb_inet.notifier_call = mlx4_ib_inet_event;
+                       err = register_inetaddr_notifier(&iboe->nb_inet);
+                       if (err) {
+                               iboe->nb_inet.notifier_call = NULL;
+                               goto err_notif;
+                       }
+               }
+#if IS_ENABLED(CONFIG_IPV6)
+               if (!iboe->nb_inet6.notifier_call) {
+                       iboe->nb_inet6.notifier_call = mlx4_ib_inet6_event;
+                       err = register_inet6addr_notifier(&iboe->nb_inet6);
+                       if (err) {
+                               iboe->nb_inet6.notifier_call = NULL;
+                               goto err_notif;
+                       }
+               }
+#endif
+               rtnl_lock();
+               mlx4_ib_scan_netdevs(ibdev);
+               rtnl_unlock();
+               mlx4_ib_init_gid_table(ibdev);
        }
 
        for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
@@ -1749,11 +2128,25 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        return ibdev;
 
 err_notif:
-       if (unregister_netdevice_notifier(&ibdev->iboe.nb))
-               pr_warn("failure unregistering notifier\n");
+       if (ibdev->iboe.nb.notifier_call) {
+               if (unregister_netdevice_notifier(&ibdev->iboe.nb))
+                       pr_warn("failure unregistering notifier\n");
+               ibdev->iboe.nb.notifier_call = NULL;
+       }
+       if (ibdev->iboe.nb_inet.notifier_call) {
+               if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
+                       pr_warn("failure unregistering notifier\n");
+               ibdev->iboe.nb_inet.notifier_call = NULL;
+       }
+#if IS_ENABLED(CONFIG_IPV6)
+       if (ibdev->iboe.nb_inet6.notifier_call) {
+               if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
+                       pr_warn("failure unregistering notifier\n");
+               ibdev->iboe.nb_inet6.notifier_call = NULL;
+       }
+#endif
        flush_workqueue(wq);
 
-err_sriov:
        mlx4_ib_close_sriov(ibdev);
 
 err_mad:
@@ -1762,6 +2155,13 @@ err_mad:
 err_reg:
        ib_unregister_device(&ibdev->ib_dev);
 
+err_steer_free_bitmap:
+       kfree(ibdev->ib_uc_qpns_bitmap);
+
+err_steer_qp_release:
+       if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
+               mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+                                     ibdev->steer_qpn_count);
 err_counter:
        for (; i; --i)
                if (ibdev->counters[i - 1] != -1)
@@ -1782,6 +2182,69 @@ err_dealloc:
        return NULL;
 }
 
+int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
+{
+       int offset;
+
+       WARN_ON(!dev->ib_uc_qpns_bitmap);
+
+       offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
+                                        dev->steer_qpn_count,
+                                        get_count_order(count));
+       if (offset < 0)
+               return offset;
+
+       *qpn = dev->steer_qpn_base + offset;
+       return 0;
+}
+
+void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
+{
+       if (!qpn ||
+           dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
+               return;
+
+       BUG_ON(qpn < dev->steer_qpn_base);
+
+       bitmap_release_region(dev->ib_uc_qpns_bitmap,
+                             qpn - dev->steer_qpn_base,
+                             get_count_order(count));
+}
+
+int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+                        int is_attach)
+{
+       int err;
+       size_t flow_size;
+       struct ib_flow_attr *flow = NULL;
+       struct ib_flow_spec_ib *ib_spec;
+
+       if (is_attach) {
+               flow_size = sizeof(struct ib_flow_attr) +
+                           sizeof(struct ib_flow_spec_ib);
+               flow = kzalloc(flow_size, GFP_KERNEL);
+               if (!flow)
+                       return -ENOMEM;
+               flow->port = mqp->port;
+               flow->num_of_specs = 1;
+               flow->size = flow_size;
+               ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
+               ib_spec->type = IB_FLOW_SPEC_IB;
+               ib_spec->size = sizeof(struct ib_flow_spec_ib);
+               /* Add an empty rule for IB L2 */
+               memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
+
+               err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
+                                           IB_FLOW_DOMAIN_NIC,
+                                           MLX4_FS_REGULAR,
+                                           &mqp->reg_id);
+       } else {
+               err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
+       }
+       kfree(flow);
+       return err;
+}
+
 static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
 {
        struct mlx4_ib_dev *ibdev = ibdev_ptr;
@@ -1795,6 +2258,26 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
                        pr_warn("failure unregistering notifier\n");
                ibdev->iboe.nb.notifier_call = NULL;
        }
+
+       if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+               mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+                                     ibdev->steer_qpn_count);
+               kfree(ibdev->ib_uc_qpns_bitmap);
+       }
+
+       if (ibdev->iboe.nb_inet.notifier_call) {
+               if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
+                       pr_warn("failure unregistering notifier\n");
+               ibdev->iboe.nb_inet.notifier_call = NULL;
+       }
+#if IS_ENABLED(CONFIG_IPV6)
+       if (ibdev->iboe.nb_inet6.notifier_call) {
+               if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
+                       pr_warn("failure unregistering notifier\n");
+               ibdev->iboe.nb_inet6.notifier_call = NULL;
+       }
+#endif
+
        iounmap(ibdev->uar_map);
        for (p = 0; p < ibdev->num_ports; ++p)
                if (ibdev->counters[p] != -1)