IB/cma: Honor traffic class from lower netdevice for RoCE
authorParav Pandit <parav@mellanox.com>
Tue, 15 Oct 2019 07:20:58 +0000 (10:20 +0300)
committerJason Gunthorpe <jgg@mellanox.com>
Tue, 22 Oct 2019 18:56:22 +0000 (15:56 -0300)
When a macvlan netdevice is used for RoCE, consider the tos->prio->tc
mapping as SL using its lower netdevice.

1. If the lower netdevice is a VLAN netdevice, consider the VLAN netdevice
   and it's parent netdevice for mapping
2. If the lower netdevice is not a VLAN netdevice, consider tc mapping
   directly from the lower netdevice

Link: https://lore.kernel.org/r/20191015072058.17347-1-leon@kernel.org
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/core/cma.c

index 0e3cf34..c8566a4 100644 (file)
@@ -2827,22 +2827,65 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
        return 0;
 }
 
-static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio)
 {
-       int prio;
        struct net_device *dev;
 
-       prio = rt_tos2priority(tos);
-       dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
+       dev = vlan_dev_real_dev(vlan_ndev);
        if (dev->num_tc)
                return netdev_get_prio_tc_map(dev, prio);
 
-#if IS_ENABLED(CONFIG_VLAN_8021Q)
+       return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) &
+               VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+}
+
+struct iboe_prio_tc_map {
+       int input_prio;
+       int output_tc;
+       bool found;
+};
+
+static int get_lower_vlan_dev_tc(struct net_device *dev, void *data)
+{
+       struct iboe_prio_tc_map *map = data;
+
+       if (is_vlan_dev(dev))
+               map->output_tc = get_vlan_ndev_tc(dev, map->input_prio);
+       else if (dev->num_tc)
+               map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio);
+       else
+               map->output_tc = 0;
+       /* We are interested only in first level VLAN device, so always
+        * return 1 to stop iterating over next level devices.
+        */
+       map->found = true;
+       return 1;
+}
+
+static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+{
+       struct iboe_prio_tc_map prio_tc_map = {};
+       int prio = rt_tos2priority(tos);
+
+       /* If VLAN device, get it directly from the VLAN netdev */
        if (is_vlan_dev(ndev))
-               return (vlan_dev_get_egress_qos_mask(ndev, prio) &
-                       VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
-#endif
-       return 0;
+               return get_vlan_ndev_tc(ndev, prio);
+
+       prio_tc_map.input_prio = prio;
+       rcu_read_lock();
+       netdev_walk_all_lower_dev_rcu(ndev,
+                                     get_lower_vlan_dev_tc,
+                                     &prio_tc_map);
+       rcu_read_unlock();
+       /* If map is found from lower device, use it; Otherwise
+        * continue with the current netdevice to get priority to tc map.
+        */
+       if (prio_tc_map.found)
+               return prio_tc_map.output_tc;
+       else if (ndev->num_tc)
+               return netdev_get_prio_tc_map(ndev, prio);
+       else
+               return 0;
 }
 
 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)