#include "lib/clock.h"
#include "diag/fw_tracer.h"
#include "mlx5_irq.h"
+#include "pci_irq.h"
#include "devlink.h"
#include "en_accel/ipsec.h"
struct mlx5_irq_table *irq_table;
struct mlx5_irq **comp_irqs;
struct mlx5_irq *ctrl_irq;
-#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *rmap;
-#endif
};
#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
}
spread_done:
rcu_read_unlock();
- ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs);
+ ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs, &table->rmap);
kfree(cpus);
return ret;
}
return ret;
}
+#ifdef CONFIG_RFS_ACCEL
+static int alloc_rmap(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
+
+ /* rmap is a mapping between irq number and queue number.
+ * Each irq can be assigned only to a single rmap.
+ * Since SFs share IRQs, rmap mapping cannot function correctly
+ * for irqs that are shared between different core/netdev RX rings.
+ * Hence we don't allow netdev rmap for SFs.
+ */
+ if (mlx5_core_is_sf(mdev))
+ return 0;
+
+ eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs);
+ if (!eq_table->rmap)
+ return -ENOMEM;
+ return 0;
+}
+
+static void free_rmap(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
+
+ if (eq_table->rmap) {
+ free_irq_cpu_rmap(eq_table->rmap);
+ eq_table->rmap = NULL;
+ }
+}
+#else
+static int alloc_rmap(struct mlx5_core_dev *mdev) { return 0; }
+static void free_rmap(struct mlx5_core_dev *mdev) {}
+#endif
+
static void destroy_comp_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
kfree(eq);
}
comp_irqs_release(dev);
+ free_rmap(dev);
}
static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
int err;
int i;
+ err = alloc_rmap(dev);
+ if (err)
+ return err;
+
ncomp_eqs = comp_irqs_request(dev);
- if (ncomp_eqs < 0)
- return ncomp_eqs;
+ if (ncomp_eqs < 0) {
+ err = ncomp_eqs;
+ goto err_irqs_req;
+ }
+
INIT_LIST_HEAD(&table->comp_eqs_list);
nent = comp_eq_depth_devlink_param_get(dev);
kfree(eq);
clean:
destroy_comp_eqs(dev);
+err_irqs_req:
+ free_rmap(dev);
return err;
}
return ERR_PTR(-ENOENT);
}
-static void clear_rmap(struct mlx5_core_dev *dev)
-{
-#ifdef CONFIG_RFS_ACCEL
- struct mlx5_eq_table *eq_table = dev->priv.eq_table;
-
- free_irq_cpu_rmap(eq_table->rmap);
-#endif
-}
-
-static int set_rmap(struct mlx5_core_dev *mdev)
-{
- int err = 0;
-#ifdef CONFIG_RFS_ACCEL
- struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
- int vecidx;
-
- eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs);
- if (!eq_table->rmap) {
- err = -ENOMEM;
- mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
- goto err_out;
- }
-
- for (vecidx = 0; vecidx < eq_table->num_comp_eqs; vecidx++) {
- err = irq_cpu_rmap_add(eq_table->rmap,
- pci_irq_vector(mdev->pdev, vecidx));
- if (err) {
- mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
- err);
- goto err_irq_cpu_rmap_add;
- }
- }
- return 0;
-
-err_irq_cpu_rmap_add:
- clear_rmap(mdev);
-err_out:
-#endif
- return err;
-}
-
/* This function should only be called after mlx5_cmd_force_teardown_hca */
void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
- if (!mlx5_core_is_sf(dev))
- clear_rmap(dev);
mlx5_irq_table_destroy(dev);
mutex_unlock(&table->lock);
}
goto err_async_eqs;
}
- if (!mlx5_core_is_sf(dev)) {
- /* rmap is a mapping between irq number and queue number.
- * each irq can be assign only to a single rmap.
- * since SFs share IRQs, rmap mapping cannot function correctly
- * for irqs that are shared for different core/netdev RX rings.
- * Hence we don't allow netdev rmap for SFs
- */
- err = set_rmap(dev);
- if (err)
- goto err_rmap;
- }
-
err = create_comp_eqs(dev);
if (err) {
mlx5_core_err(dev, "Failed to create completion EQs\n");
}
return 0;
+
err_comp_eqs:
- if (!mlx5_core_is_sf(dev))
- clear_rmap(dev);
-err_rmap:
destroy_async_eqs(dev);
err_async_eqs:
return err;
void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
{
- if (!mlx5_core_is_sf(dev))
- clear_rmap(dev);
destroy_comp_eqs(dev);
destroy_async_eqs(dev);
}
#include "mlx5_irq.h"
#include "pci_irq.h"
#include "lib/sf.h"
+#include "lib/eq.h"
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
static void irq_release(struct mlx5_irq *irq)
{
struct mlx5_irq_pool *pool = irq->pool;
+#ifdef CONFIG_RFS_ACCEL
+ struct cpu_rmap *rmap;
+#endif
xa_erase(&pool->irqs, irq->map.index);
- /* free_irq requires that affinity_hint and rmap will be cleared
- * before calling it. This is why there is asymmetry with set_rmap
- * which should be called after alloc_irq but before request_irq.
+ /* free_irq requires that affinity_hint and rmap will be cleared before
+ * calling it. To satisfy this requirement, we call
+ * irq_cpu_rmap_remove() to remove the notifier
*/
irq_update_affinity_hint(irq->map.virq, NULL);
+#ifdef CONFIG_RFS_ACCEL
+ rmap = mlx5_eq_table_get_rmap(pool->dev);
+ if (rmap && irq->map.index)
+ irq_cpu_rmap_remove(rmap, irq->map.virq);
+#endif
+
free_cpumask_var(irq->mask);
free_irq(irq->map.virq, &irq->nh);
+ if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev))
+ pci_msix_free_irq(pool->dev->pdev, irq->map);
kfree(irq);
}
return;
}
- if (vecidx == pool->xa_num_irqs.max) {
+ if (!vecidx) {
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
return;
}
}
struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
- struct irq_affinity_desc *af_desc)
+ struct irq_affinity_desc *af_desc,
+ struct cpu_rmap **rmap)
{
struct mlx5_core_dev *dev = pool->dev;
char name[MLX5_MAX_IRQ_NAME];
irq = kzalloc(sizeof(*irq), GFP_KERNEL);
if (!irq)
return ERR_PTR(-ENOMEM);
- irq->map.virq = pci_irq_vector(dev->pdev, i);
+ if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) {
+ /* The vector at index 0 was already allocated.
+ * Just get the irq number. If dynamic irq is not supported
+ * vectors have also been allocated.
+ */
+ irq->map.virq = pci_irq_vector(dev->pdev, i);
+ irq->map.index = 0;
+ } else {
+ irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc);
+ if (!irq->map.virq) {
+ err = irq->map.index;
+ goto err_alloc_irq;
+ }
+ }
+
+ if (i && rmap && *rmap) {
+#ifdef CONFIG_RFS_ACCEL
+ err = irq_cpu_rmap_add(*rmap, irq->map.virq);
+ if (err)
+ goto err_irq_rmap;
+#endif
+ }
if (!mlx5_irq_pool_is_sf_pool(pool))
irq_set_name(pool, name, i);
else
err_cpumask:
free_irq(irq->map.virq, &irq->nh);
err_req_irq:
+#ifdef CONFIG_RFS_ACCEL
+ if (i && rmap && *rmap) {
+ free_irq_cpu_rmap(*rmap);
+ *rmap = NULL;
+ }
+err_irq_rmap:
+#endif
+ if (i && pci_msix_can_alloc_dyn(dev->pdev))
+ pci_msix_free_irq(dev->pdev, irq->map);
+err_alloc_irq:
kfree(irq);
return ERR_PTR(err);
}
/* requesting an irq from a given pool according to given index */
static struct mlx5_irq *
irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
- struct irq_affinity_desc *af_desc)
+ struct irq_affinity_desc *af_desc,
+ struct cpu_rmap **rmap)
{
struct mlx5_irq *irq;
mlx5_irq_get_locked(irq);
goto unlock;
}
- irq = mlx5_irq_alloc(pool, vecidx, af_desc);
+ irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap);
unlock:
mutex_unlock(&pool->lock);
return irq;
/* In case we only have a single IRQ for PF/VF */
cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask);
}
- /* Allocate the IRQ in the last index of the pool */
- irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, &af_desc);
+ /* Allocate the IRQ in index 0. The vector was already allocated */
+ irq = irq_pool_request_vector(pool, 0, &af_desc, NULL);
} else {
irq = mlx5_irq_affinity_request(pool, &af_desc);
}
* @vecidx: vector index of the IRQ. This argument is ignore if affinity is
* provided.
* @af_desc: affinity descriptor for this IRQ.
+ * @rmap: pointer to reverse map pointer for completion interrupts
*
* This function returns a pointer to IRQ, or ERR_PTR in case of error.
*/
struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
- struct irq_affinity_desc *af_desc)
+ struct irq_affinity_desc *af_desc,
+ struct cpu_rmap **rmap)
{
struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
struct mlx5_irq_pool *pool;
struct mlx5_irq *irq;
pool = irq_table->pcif_pool;
- irq = irq_pool_request_vector(pool, vecidx, af_desc);
+ irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap);
if (IS_ERR(irq))
return irq;
mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
* @cpus: CPUs array for binding the IRQs
* @nirqs: number of IRQs to request.
* @irqs: an output array of IRQs pointers.
+ * @rmap: pointer to reverse map pointer for completion interrupts
*
* Each IRQ is bound to at most 1 CPU.
* This function is requests nirqs IRQs, starting from @vecidx.
* @nirqs), if successful, or a negative error code in case of an error.
*/
int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
- struct mlx5_irq **irqs)
+ struct mlx5_irq **irqs, struct cpu_rmap **rmap)
{
struct irq_affinity_desc af_desc;
struct mlx5_irq *irq;
af_desc.is_managed = 1;
for (i = 0; i < nirqs; i++) {
cpumask_set_cpu(cpus[i], &af_desc.mask);
- irq = mlx5_irq_request(dev, i, &af_desc);
+ irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap);
if (IS_ERR(irq))
break;
cpumask_clear(&af_desc.mask);
1 << MLX5_CAP_GEN(dev, log_max_eq);
int total_vec;
int pcif_vec;
+ int req_vec;
int err;
+ int n;
if (mlx5_core_is_sf(dev))
return 0;
if (mlx5_sf_max_functions(dev))
total_vec += MLX5_IRQ_CTRL_SF_MAX +
MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
+ total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev));
+ pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev));
- total_vec = pci_alloc_irq_vectors(dev->pdev, 1, total_vec, PCI_IRQ_MSIX);
- if (total_vec < 0)
- return total_vec;
- pcif_vec = min(pcif_vec, total_vec);
+ req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec;
+ n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX);
+ if (n < 0)
+ return n;
err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec);
if (err)