drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c

   1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2 /* Copyright (c) 2019 Mellanox Technologies. */
   3
   4 #include <linux/pci.h>
   5 #include <linux/interrupt.h>
   6 #include <linux/notifier.h>
   7 #include <linux/mlx5/driver.h>
   8 #include <linux/mlx5/vport.h>
   9 #include "mlx5_core.h"
  10 #include "mlx5_irq.h"
  11 #include "pci_irq.h"
  12 #include "lib/sf.h"
  13 #include "lib/eq.h"
  14 #ifdef CONFIG_RFS_ACCEL
  15 #include <linux/cpu_rmap.h>
  16 #endif
  17
  18 #define MLX5_SFS_PER_CTRL_IRQ 64
  19 #define MLX5_IRQ_CTRL_SF_MAX 8
  20 /* min num of vectors for SFs to be enabled */
  21 #define MLX5_IRQ_VEC_COMP_BASE_SF 2
  22
  23 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
  24 #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
  25 #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
  26 #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
  27
  28 struct mlx5_irq {
  29         struct atomic_notifier_head nh;
  30         cpumask_var_t mask;
  31         char name[MLX5_MAX_IRQ_NAME];
  32         struct mlx5_irq_pool *pool;
  33         int refcount;
  34         struct msi_map map;
  35         u32 pool_index;
  36 };
  37
  38 struct mlx5_irq_table {
  39         struct mlx5_irq_pool *pcif_pool;
  40         struct mlx5_irq_pool *sf_ctrl_pool;
  41         struct mlx5_irq_pool *sf_comp_pool;
  42 };
  43
  44 /**
  45  * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
  46  *                                   to be ssigned to each VF.
  47  * @dev: PF to work on
  48  * @num_vfs: Number of enabled VFs
  49  */
  50 int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
  51 {
  52         int num_vf_msix, min_msix, max_msix;
  53
  54         num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
  55         if (!num_vf_msix)
  56                 return 0;
  57
  58         min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
  59         max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
  60
  61         /* Limit maximum number of MSI-X vectors so the default configuration
  62          * has some available in the pool. This will allow the user to increase
  63          * the number of vectors in a VF without having to first size-down other
  64          * VFs.
  65          */
  66         return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
  67 }
  68
  69 /**
  70  * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
  71  * @dev: PF to work on
  72  * @function_id: Internal PCI VF function IDd
  73  * @msix_vec_count: Number of MSI-X vectors to set
  74  */
  75 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
  76                             int msix_vec_count)
  77 {
  78         int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
  79         int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
  80         void *hca_cap = NULL, *query_cap = NULL, *cap;
  81         int num_vf_msix, min_msix, max_msix;
  82         int ret;
  83
  84         num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
  85         if (!num_vf_msix)
  86                 return 0;
  87
  88         if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
  89                 return -EOPNOTSUPP;
  90
  91         min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
  92         max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
  93
  94         if (msix_vec_count < min_msix)
  95                 return -EINVAL;
  96
  97         if (msix_vec_count > max_msix)
  98                 return -EOVERFLOW;
  99
 100         query_cap = kvzalloc(query_sz, GFP_KERNEL);
 101         hca_cap = kvzalloc(set_sz, GFP_KERNEL);
 102         if (!hca_cap || !query_cap) {
 103                 ret = -ENOMEM;
 104                 goto out;
 105         }
 106
 107         ret = mlx5_vport_get_other_func_general_cap(dev, function_id, query_cap);
 108         if (ret)
 109                 goto out;
 110
 111         cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
 112         memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
 113                MLX5_UN_SZ_BYTES(hca_cap_union));
 114         MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
 115
 116         MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
 117         MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
 118         MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
 119
 120         MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
 121                  MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
 122         ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
 123 out:
 124         kvfree(hca_cap);
 125         kvfree(query_cap);
 126         return ret;
 127 }
 128
 129 static void irq_release(struct mlx5_irq *irq)
 130 {
 131         struct mlx5_irq_pool *pool = irq->pool;
 132 #ifdef CONFIG_RFS_ACCEL
 133         struct cpu_rmap *rmap;
 134 #endif
 135
 136         xa_erase(&pool->irqs, irq->pool_index);
 137         /* free_irq requires that affinity_hint and rmap will be cleared before
 138          * calling it. To satisfy this requirement, we call
 139          * irq_cpu_rmap_remove() to remove the notifier
 140          */
 141         irq_update_affinity_hint(irq->map.virq, NULL);
 142 #ifdef CONFIG_RFS_ACCEL
 143         rmap = mlx5_eq_table_get_rmap(pool->dev);
 144         if (rmap && irq->map.index)
 145                 irq_cpu_rmap_remove(rmap, irq->map.virq);
 146 #endif
 147
 148         free_cpumask_var(irq->mask);
 149         free_irq(irq->map.virq, &irq->nh);
 150         if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev))
 151                 pci_msix_free_irq(pool->dev->pdev, irq->map);
 152         kfree(irq);
 153 }
 154
 155 int mlx5_irq_put(struct mlx5_irq *irq)
 156 {
 157         struct mlx5_irq_pool *pool = irq->pool;
 158         int ret = 0;
 159
 160         mutex_lock(&pool->lock);
 161         irq->refcount--;
 162         if (!irq->refcount) {
 163                 irq_release(irq);
 164                 ret = 1;
 165         }
 166         mutex_unlock(&pool->lock);
 167         return ret;
 168 }
 169
 170 int mlx5_irq_read_locked(struct mlx5_irq *irq)
 171 {
 172         lockdep_assert_held(&irq->pool->lock);
 173         return irq->refcount;
 174 }
 175
 176 int mlx5_irq_get_locked(struct mlx5_irq *irq)
 177 {
 178         lockdep_assert_held(&irq->pool->lock);
 179         if (WARN_ON_ONCE(!irq->refcount))
 180                 return 0;
 181         irq->refcount++;
 182         return 1;
 183 }
 184
 185 static int irq_get(struct mlx5_irq *irq)
 186 {
 187         int err;
 188
 189         mutex_lock(&irq->pool->lock);
 190         err = mlx5_irq_get_locked(irq);
 191         mutex_unlock(&irq->pool->lock);
 192         return err;
 193 }
 194
 195 static irqreturn_t irq_int_handler(int irq, void *nh)
 196 {
 197         atomic_notifier_call_chain(nh, 0, NULL);
 198         return IRQ_HANDLED;
 199 }
 200
 201 static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
 202 {
 203         snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
 204 }
 205
 206 static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
 207 {
 208         if (!pool->xa_num_irqs.max) {
 209                 /* in case we only have a single irq for the device */
 210                 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
 211                 return;
 212         }
 213
 214         if (!vecidx) {
 215                 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
 216                 return;
 217         }
 218
 219         snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
 220 }
 221
 222 struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
 223                                 struct irq_affinity_desc *af_desc,
 224                                 struct cpu_rmap **rmap)
 225 {
 226         struct mlx5_core_dev *dev = pool->dev;
 227         char name[MLX5_MAX_IRQ_NAME];
 228         struct mlx5_irq *irq;
 229         int err;
 230
 231         irq = kzalloc(sizeof(*irq), GFP_KERNEL);
 232         if (!irq)
 233                 return ERR_PTR(-ENOMEM);
 234         if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) {
 235                 /* The vector at index 0 was already allocated.
 236                  * Just get the irq number. If dynamic irq is not supported
 237                  * vectors have also been allocated.
 238                  */
 239                 irq->map.virq = pci_irq_vector(dev->pdev, i);
 240                 irq->map.index = 0;
 241         } else {
 242                 irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc);
 243                 if (!irq->map.virq) {
 244                         err = irq->map.index;
 245                         goto err_alloc_irq;
 246                 }
 247         }
 248
 249         if (i && rmap && *rmap) {
 250 #ifdef CONFIG_RFS_ACCEL
 251                 err = irq_cpu_rmap_add(*rmap, irq->map.virq);
 252                 if (err)
 253                         goto err_irq_rmap;
 254 #endif
 255         }
 256         if (!mlx5_irq_pool_is_sf_pool(pool))
 257                 irq_set_name(pool, name, i);
 258         else
 259                 irq_sf_set_name(pool, name, i);
 260         ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
 261         snprintf(irq->name, MLX5_MAX_IRQ_NAME,
 262                  "%s@pci:%s", name, pci_name(dev->pdev));
 263         err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name,
 264                           &irq->nh);
 265         if (err) {
 266                 mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
 267                 goto err_req_irq;
 268         }
 269         if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
 270                 mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
 271                 err = -ENOMEM;
 272                 goto err_cpumask;
 273         }
 274         if (af_desc) {
 275                 cpumask_copy(irq->mask, &af_desc->mask);
 276                 irq_set_affinity_and_hint(irq->map.virq, irq->mask);
 277         }
 278         irq->pool = pool;
 279         irq->refcount = 1;
 280         irq->pool_index = i;
 281         err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL));
 282         if (err) {
 283                 mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
 284                               irq->pool_index, err);
 285                 goto err_xa;
 286         }
 287         return irq;
 288 err_xa:
 289         if (af_desc)
 290                 irq_update_affinity_hint(irq->map.virq, NULL);
 291         free_cpumask_var(irq->mask);
 292 err_cpumask:
 293         free_irq(irq->map.virq, &irq->nh);
 294 err_req_irq:
 295 #ifdef CONFIG_RFS_ACCEL
 296         if (i && rmap && *rmap) {
 297                 free_irq_cpu_rmap(*rmap);
 298                 *rmap = NULL;
 299         }
 300 err_irq_rmap:
 301 #endif
 302         if (i && pci_msix_can_alloc_dyn(dev->pdev))
 303                 pci_msix_free_irq(dev->pdev, irq->map);
 304 err_alloc_irq:
 305         kfree(irq);
 306         return ERR_PTR(err);
 307 }
 308
 309 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
 310 {
 311         int ret;
 312
 313         ret = irq_get(irq);
 314         if (!ret)
 315                 /* Something very bad happens here, we are enabling EQ
 316                  * on non-existing IRQ.
 317                  */
 318                 return -ENOENT;
 319         ret = atomic_notifier_chain_register(&irq->nh, nb);
 320         if (ret)
 321                 mlx5_irq_put(irq);
 322         return ret;
 323 }
 324
 325 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
 326 {
 327         int err = 0;
 328
 329         err = atomic_notifier_chain_unregister(&irq->nh, nb);
 330         mlx5_irq_put(irq);
 331         return err;
 332 }
 333
 334 struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
 335 {
 336         return irq->mask;
 337 }
 338
 339 int mlx5_irq_get_index(struct mlx5_irq *irq)
 340 {
 341         return irq->map.index;
 342 }
 343
 344 /* irq_pool API */
 345
 346 /* requesting an irq from a given pool according to given index */
 347 static struct mlx5_irq *
 348 irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
 349                         struct irq_affinity_desc *af_desc,
 350                         struct cpu_rmap **rmap)
 351 {
 352         struct mlx5_irq *irq;
 353
 354         mutex_lock(&pool->lock);
 355         irq = xa_load(&pool->irqs, vecidx);
 356         if (irq) {
 357                 mlx5_irq_get_locked(irq);
 358                 goto unlock;
 359         }
 360         irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap);
 361 unlock:
 362         mutex_unlock(&pool->lock);
 363         return irq;
 364 }
 365
 366 static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
 367 {
 368         return irq_table->sf_ctrl_pool;
 369 }
 370
 371 static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
 372 {
 373         return irq_table->sf_comp_pool;
 374 }
 375
 376 struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
 377 {
 378         struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
 379         struct mlx5_irq_pool *pool = NULL;
 380
 381         if (mlx5_core_is_sf(dev))
 382                 pool = sf_irq_pool_get(irq_table);
 383
 384         /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
 385          * the PF IRQs pool in case the SF pool doesn't exist.
 386          */
 387         return pool ? pool : irq_table->pcif_pool;
 388 }
 389
 390 static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
 391 {
 392         struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
 393         struct mlx5_irq_pool *pool = NULL;
 394
 395         if (mlx5_core_is_sf(dev))
 396                 pool = sf_ctrl_irq_pool_get(irq_table);
 397
 398         /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
 399          * the PF IRQs pool in case the SF pool doesn't exist.
 400          */
 401         return pool ? pool : irq_table->pcif_pool;
 402 }
 403
 404 /**
 405  * mlx5_irqs_release - release one or more IRQs back to the system.
 406  * @irqs: IRQs to be released.
 407  * @nirqs: number of IRQs to be released.
 408  */
 409 static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs)
 410 {
 411         int i;
 412
 413         for (i = 0; i < nirqs; i++) {
 414                 synchronize_irq(irqs[i]->map.virq);
 415                 mlx5_irq_put(irqs[i]);
 416         }
 417 }
 418
 419 /**
 420  * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
 421  * @ctrl_irq: ctrl IRQ to be released.
 422  */
 423 void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
 424 {
 425         mlx5_irqs_release(&ctrl_irq, 1);
 426 }
 427
 428 /**
 429  * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
 430  * @dev: mlx5 device that requesting the IRQ.
 431  *
 432  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
 433  */
 434 struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
 435 {
 436         struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
 437         struct irq_affinity_desc af_desc;
 438         struct mlx5_irq *irq;
 439
 440         cpumask_copy(&af_desc.mask, cpu_online_mask);
 441         af_desc.is_managed = false;
 442         if (!mlx5_irq_pool_is_sf_pool(pool)) {
 443                 /* In case we are allocating a control IRQ from a pci device's pool.
 444                  * This can happen also for a SF if the SFs pool is empty.
 445                  */
 446                 if (!pool->xa_num_irqs.max) {
 447                         cpumask_clear(&af_desc.mask);
 448                         /* In case we only have a single IRQ for PF/VF */
 449                         cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask);
 450                 }
 451                 /* Allocate the IRQ in index 0. The vector was already allocated */
 452                 irq = irq_pool_request_vector(pool, 0, &af_desc, NULL);
 453         } else {
 454                 irq = mlx5_irq_affinity_request(pool, &af_desc);
 455         }
 456
 457         return irq;
 458 }
 459
 460 /**
 461  * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
 462  * @dev: mlx5 device that requesting the IRQ.
 463  * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
 464  * provided.
 465  * @af_desc: affinity descriptor for this IRQ.
 466  * @rmap: pointer to reverse map pointer for completion interrupts
 467  *
 468  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
 469  */
 470 struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
 471                                   struct irq_affinity_desc *af_desc,
 472                                   struct cpu_rmap **rmap)
 473 {
 474         struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
 475         struct mlx5_irq_pool *pool;
 476         struct mlx5_irq *irq;
 477
 478         pool = irq_table->pcif_pool;
 479         irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap);
 480         if (IS_ERR(irq))
 481                 return irq;
 482         mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
 483                       irq->map.virq, cpumask_pr_args(&af_desc->mask),
 484                       irq->refcount / MLX5_EQ_REFS_PER_IRQ);
 485         return irq;
 486 }
 487
 488 /**
 489  * mlx5_msix_alloc - allocate msix interrupt
 490  * @dev: mlx5 device from which to request
 491  * @handler: interrupt handler
 492  * @affdesc: affinity descriptor
 493  * @name: interrupt name
 494  *
 495  * Returns: struct msi_map with result encoded.
 496  * Note: the caller must make sure to release the irq by calling
 497  *       mlx5_msix_free() if shutdown was initiated.
 498  */
 499 struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev,
 500                                irqreturn_t (*handler)(int, void *),
 501                                const struct irq_affinity_desc *affdesc,
 502                                const char *name)
 503 {
 504         struct msi_map map;
 505         int err;
 506
 507         if (!dev->pdev) {
 508                 map.virq = 0;
 509                 map.index = -EINVAL;
 510                 return map;
 511         }
 512
 513         map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc);
 514         if (!map.virq)
 515                 return map;
 516
 517         err = request_irq(map.virq, handler, 0, name, NULL);
 518         if (err) {
 519                 mlx5_core_warn(dev, "err %d\n", err);
 520                 pci_msix_free_irq(dev->pdev, map);
 521                 map.virq = 0;
 522                 map.index = -ENOMEM;
 523         }
 524         return map;
 525 }
 526 EXPORT_SYMBOL(mlx5_msix_alloc);
 527
 528 /**
 529  * mlx5_msix_free - free a previously allocated msix interrupt
 530  * @dev: mlx5 device associated with interrupt
 531  * @map: map previously returned by mlx5_msix_alloc()
 532  */
 533 void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map)
 534 {
 535         free_irq(map.virq, NULL);
 536         pci_msix_free_irq(dev->pdev, map);
 537 }
 538 EXPORT_SYMBOL(mlx5_msix_free);
 539
 540 /**
 541  * mlx5_irqs_release_vectors - release one or more IRQs back to the system.
 542  * @irqs: IRQs to be released.
 543  * @nirqs: number of IRQs to be released.
 544  */
 545 void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
 546 {
 547         mlx5_irqs_release(irqs, nirqs);
 548 }
 549
 550 /**
 551  * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device.
 552  * @dev: mlx5 device that is requesting the IRQs.
 553  * @cpus: CPUs array for binding the IRQs
 554  * @nirqs: number of IRQs to request.
 555  * @irqs: an output array of IRQs pointers.
 556  * @rmap: pointer to reverse map pointer for completion interrupts
 557  *
 558  * Each IRQ is bound to at most 1 CPU.
 559  * This function is requests nirqs IRQs, starting from @vecidx.
 560  *
 561  * This function returns the number of IRQs requested, (which might be smaller than
 562  * @nirqs), if successful, or a negative error code in case of an error.
 563  */
 564 int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
 565                               struct mlx5_irq **irqs, struct cpu_rmap **rmap)
 566 {
 567         struct irq_affinity_desc af_desc;
 568         struct mlx5_irq *irq;
 569         int i;
 570
 571         af_desc.is_managed = false;
 572         for (i = 0; i < nirqs; i++) {
 573                 cpumask_set_cpu(cpus[i], &af_desc.mask);
 574                 irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap);
 575                 if (IS_ERR(irq))
 576                         break;
 577                 cpumask_clear(&af_desc.mask);
 578                 irqs[i] = irq;
 579         }
 580
 581         return i ? i : PTR_ERR(irq);
 582 }
 583
 584 static struct mlx5_irq_pool *
 585 irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
 586                u32 min_threshold, u32 max_threshold)
 587 {
 588         struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
 589
 590         if (!pool)
 591                 return ERR_PTR(-ENOMEM);
 592         pool->dev = dev;
 593         mutex_init(&pool->lock);
 594         xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
 595         pool->xa_num_irqs.min = start;
 596         pool->xa_num_irqs.max = start + size - 1;
 597         if (name)
 598                 snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
 599                          "%s", name);
 600         pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
 601         pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
 602         mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
 603                       name, size, start);
 604         return pool;
 605 }
 606
 607 static void irq_pool_free(struct mlx5_irq_pool *pool)
 608 {
 609         struct mlx5_irq *irq;
 610         unsigned long index;
 611
 612         /* There are cases in which we are destrying the irq_table before
 613          * freeing all the IRQs, fast teardown for example. Hence, free the irqs
 614          * which might not have been freed.
 615          */
 616         xa_for_each(&pool->irqs, index, irq)
 617                 irq_release(irq);
 618         xa_destroy(&pool->irqs);
 619         mutex_destroy(&pool->lock);
 620         kfree(pool->irqs_per_cpu);
 621         kvfree(pool);
 622 }
 623
 624 static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec)
 625 {
 626         struct mlx5_irq_table *table = dev->priv.irq_table;
 627         int num_sf_ctrl_by_msix;
 628         int num_sf_ctrl_by_sfs;
 629         int num_sf_ctrl;
 630         int err;
 631
 632         /* init pcif_pool */
 633         table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL,
 634                                           MLX5_EQ_SHARE_IRQ_MIN_COMP,
 635                                           MLX5_EQ_SHARE_IRQ_MAX_COMP);
 636         if (IS_ERR(table->pcif_pool))
 637                 return PTR_ERR(table->pcif_pool);
 638         if (!mlx5_sf_max_functions(dev))
 639                 return 0;
 640         if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
 641                 mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
 642                 return 0;
 643         }
 644
 645         /* init sf_ctrl_pool */
 646         num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF);
 647         num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
 648                                           MLX5_SFS_PER_CTRL_IRQ);
 649         num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
 650         num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
 651         table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl,
 652                                              "mlx5_sf_ctrl",
 653                                              MLX5_EQ_SHARE_IRQ_MIN_CTRL,
 654                                              MLX5_EQ_SHARE_IRQ_MAX_CTRL);
 655         if (IS_ERR(table->sf_ctrl_pool)) {
 656                 err = PTR_ERR(table->sf_ctrl_pool);
 657                 goto err_pf;
 658         }
 659         /* init sf_comp_pool */
 660         table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl,
 661                                              sf_vec - num_sf_ctrl, "mlx5_sf_comp",
 662                                              MLX5_EQ_SHARE_IRQ_MIN_COMP,
 663                                              MLX5_EQ_SHARE_IRQ_MAX_COMP);
 664         if (IS_ERR(table->sf_comp_pool)) {
 665                 err = PTR_ERR(table->sf_comp_pool);
 666                 goto err_sf_ctrl;
 667         }
 668
 669         table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
 670         if (!table->sf_comp_pool->irqs_per_cpu) {
 671                 err = -ENOMEM;
 672                 goto err_irqs_per_cpu;
 673         }
 674
 675         return 0;
 676
 677 err_irqs_per_cpu:
 678         irq_pool_free(table->sf_comp_pool);
 679 err_sf_ctrl:
 680         irq_pool_free(table->sf_ctrl_pool);
 681 err_pf:
 682         irq_pool_free(table->pcif_pool);
 683         return err;
 684 }
 685
 686 static void irq_pools_destroy(struct mlx5_irq_table *table)
 687 {
 688         if (table->sf_ctrl_pool) {
 689                 irq_pool_free(table->sf_comp_pool);
 690                 irq_pool_free(table->sf_ctrl_pool);
 691         }
 692         irq_pool_free(table->pcif_pool);
 693 }
 694
 695 static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
 696 {
 697         struct mlx5_irq *irq;
 698         unsigned long index;
 699
 700         xa_for_each(&pool->irqs, index, irq)
 701                 free_irq(irq->map.virq, &irq->nh);
 702 }
 703
 704 static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
 705 {
 706         if (table->sf_ctrl_pool) {
 707                 mlx5_irq_pool_free_irqs(table->sf_comp_pool);
 708                 mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
 709         }
 710         mlx5_irq_pool_free_irqs(table->pcif_pool);
 711 }
 712
 713 /* irq_table API */
 714
 715 int mlx5_irq_table_init(struct mlx5_core_dev *dev)
 716 {
 717         struct mlx5_irq_table *irq_table;
 718
 719         if (mlx5_core_is_sf(dev))
 720                 return 0;
 721
 722         irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL,
 723                                   dev->priv.numa_node);
 724         if (!irq_table)
 725                 return -ENOMEM;
 726
 727         dev->priv.irq_table = irq_table;
 728         return 0;
 729 }
 730
 731 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
 732 {
 733         if (mlx5_core_is_sf(dev))
 734                 return;
 735
 736         kvfree(dev->priv.irq_table);
 737 }
 738
 739 int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
 740 {
 741         if (!table->pcif_pool->xa_num_irqs.max)
 742                 return 1;
 743         return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min;
 744 }
 745
 746 int mlx5_irq_table_create(struct mlx5_core_dev *dev)
 747 {
 748         int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
 749                       MLX5_CAP_GEN(dev, max_num_eqs) :
 750                       1 << MLX5_CAP_GEN(dev, log_max_eq);
 751         int total_vec;
 752         int pcif_vec;
 753         int req_vec;
 754         int err;
 755         int n;
 756
 757         if (mlx5_core_is_sf(dev))
 758                 return 0;
 759
 760         pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
 761         pcif_vec = min_t(int, pcif_vec, num_eqs);
 762
 763         total_vec = pcif_vec;
 764         if (mlx5_sf_max_functions(dev))
 765                 total_vec += MLX5_IRQ_CTRL_SF_MAX +
 766                         MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
 767         total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev));
 768         pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev));
 769
 770         req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec;
 771         n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX);
 772         if (n < 0)
 773                 return n;
 774
 775         err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec);
 776         if (err)
 777                 pci_free_irq_vectors(dev->pdev);
 778
 779         return err;
 780 }
 781
 782 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
 783 {
 784         struct mlx5_irq_table *table = dev->priv.irq_table;
 785
 786         if (mlx5_core_is_sf(dev))
 787                 return;
 788
 789         /* There are cases where IRQs still will be in used when we reaching
 790          * to here. Hence, making sure all the irqs are released.
 791          */
 792         irq_pools_destroy(table);
 793         pci_free_irq_vectors(dev->pdev);
 794 }
 795
 796 void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
 797 {
 798         struct mlx5_irq_table *table = dev->priv.irq_table;
 799
 800         if (mlx5_core_is_sf(dev))
 801                 return;
 802
 803         mlx5_irq_pools_free_irqs(table);
 804         pci_free_irq_vectors(dev->pdev);
 805 }
 806
 807 int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
 808 {
 809         if (table->sf_comp_pool)
 810                 return min_t(int, num_online_cpus(),
 811                              table->sf_comp_pool->xa_num_irqs.max -
 812                              table->sf_comp_pool->xa_num_irqs.min + 1);
 813         else
 814                 return mlx5_irq_table_get_num_comp(table);
 815 }
 816
 817 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
 818 {
 819 #ifdef CONFIG_MLX5_SF
 820         if (mlx5_core_is_sf(dev))
 821                 return dev->priv.parent_mdev->priv.irq_table;
 822 #endif
 823         return dev->priv.irq_table;
 824 }