drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c

   1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2 /* Copyright (c) 2019 Mellanox Technologies. */
   3
   4 #include <linux/pci.h>
   5 #include <linux/interrupt.h>
   6 #include <linux/notifier.h>
   7 #include <linux/mlx5/driver.h>
   8 #include <linux/mlx5/vport.h>
   9 #include "mlx5_core.h"
  10 #include "mlx5_irq.h"
  11 #include "pci_irq.h"
  12 #include "lib/sf.h"
  13 #include "lib/eq.h"
  14 #ifdef CONFIG_RFS_ACCEL
  15 #include <linux/cpu_rmap.h>
  16 #endif
  17
  18 #define MLX5_SFS_PER_CTRL_IRQ 64
  19 #define MLX5_IRQ_CTRL_SF_MAX 8
  20 /* min num of vectors for SFs to be enabled */
  21 #define MLX5_IRQ_VEC_COMP_BASE_SF 2
  22
  23 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
  24 #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
  25 #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
  26 #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
  27
  28 struct mlx5_irq {
  29         struct atomic_notifier_head nh;
  30         cpumask_var_t mask;
  31         char name[MLX5_MAX_IRQ_NAME];
  32         struct mlx5_irq_pool *pool;
  33         int refcount;
  34         struct msi_map map;
  35         u32 pool_index;
  36 };
  37
  38 struct mlx5_irq_table {
  39         struct mlx5_irq_pool *pcif_pool;
  40         struct mlx5_irq_pool *sf_ctrl_pool;
  41         struct mlx5_irq_pool *sf_comp_pool;
  42 };
  43
  44 /**
  45  * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
  46  *                                   to be ssigned to each VF.
  47  * @dev: PF to work on
  48  * @num_vfs: Number of enabled VFs
  49  */
  50 int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
  51 {
  52         int num_vf_msix, min_msix, max_msix;
  53
  54         num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
  55         if (!num_vf_msix)
  56                 return 0;
  57
  58         min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
  59         max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
  60
  61         /* Limit maximum number of MSI-X vectors so the default configuration
  62          * has some available in the pool. This will allow the user to increase
  63          * the number of vectors in a VF without having to first size-down other
  64          * VFs.
  65          */
  66         return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
  67 }
  68
  69 /**
  70  * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
  71  * @dev: PF to work on
  72  * @function_id: Internal PCI VF function IDd
  73  * @msix_vec_count: Number of MSI-X vectors to set
  74  */
  75 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
  76                             int msix_vec_count)
  77 {
  78         int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
  79         int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
  80         void *hca_cap = NULL, *query_cap = NULL, *cap;
  81         int num_vf_msix, min_msix, max_msix;
  82         int ret;
  83
  84         num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
  85         if (!num_vf_msix)
  86                 return 0;
  87
  88         if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
  89                 return -EOPNOTSUPP;
  90
  91         min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
  92         max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
  93
  94         if (msix_vec_count < min_msix)
  95                 return -EINVAL;
  96
  97         if (msix_vec_count > max_msix)
  98                 return -EOVERFLOW;
  99
 100         query_cap = kvzalloc(query_sz, GFP_KERNEL);
 101         hca_cap = kvzalloc(set_sz, GFP_KERNEL);
 102         if (!hca_cap || !query_cap) {
 103                 ret = -ENOMEM;
 104                 goto out;
 105         }
 106
 107         ret = mlx5_vport_get_other_func_general_cap(dev, function_id, query_cap);
 108         if (ret)
 109                 goto out;
 110
 111         cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
 112         memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
 113                MLX5_UN_SZ_BYTES(hca_cap_union));
 114         MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
 115
 116         MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
 117         MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
 118         MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
 119
 120         MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
 121                  MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
 122         ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
 123 out:
 124         kvfree(hca_cap);
 125         kvfree(query_cap);
 126         return ret;
 127 }
 128
 129 static void irq_release(struct mlx5_irq *irq)
 130 {
 131         struct mlx5_irq_pool *pool = irq->pool;
 132 #ifdef CONFIG_RFS_ACCEL
 133         struct cpu_rmap *rmap;
 134 #endif
 135
 136         xa_erase(&pool->irqs, irq->pool_index);
 137         /* free_irq requires that affinity_hint and rmap will be cleared before
 138          * calling it. To satisfy this requirement, we call
 139          * irq_cpu_rmap_remove() to remove the notifier
 140          */
 141         irq_update_affinity_hint(irq->map.virq, NULL);
 142 #ifdef CONFIG_RFS_ACCEL
 143         rmap = mlx5_eq_table_get_rmap(pool->dev);
 144         if (rmap)
 145                 irq_cpu_rmap_remove(rmap, irq->map.virq);
 146 #endif
 147
 148         free_cpumask_var(irq->mask);
 149         free_irq(irq->map.virq, &irq->nh);
 150         if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev))
 151                 pci_msix_free_irq(pool->dev->pdev, irq->map);
 152         kfree(irq);
 153 }
 154
 155 int mlx5_irq_put(struct mlx5_irq *irq)
 156 {
 157         struct mlx5_irq_pool *pool = irq->pool;
 158         int ret = 0;
 159
 160         mutex_lock(&pool->lock);
 161         irq->refcount--;
 162         if (!irq->refcount) {
 163                 irq_release(irq);
 164                 ret = 1;
 165         }
 166         mutex_unlock(&pool->lock);
 167         return ret;
 168 }
 169
 170 int mlx5_irq_read_locked(struct mlx5_irq *irq)
 171 {
 172         lockdep_assert_held(&irq->pool->lock);
 173         return irq->refcount;
 174 }
 175
 176 int mlx5_irq_get_locked(struct mlx5_irq *irq)
 177 {
 178         lockdep_assert_held(&irq->pool->lock);
 179         if (WARN_ON_ONCE(!irq->refcount))
 180                 return 0;
 181         irq->refcount++;
 182         return 1;
 183 }
 184
 185 static int irq_get(struct mlx5_irq *irq)
 186 {
 187         int err;
 188
 189         mutex_lock(&irq->pool->lock);
 190         err = mlx5_irq_get_locked(irq);
 191         mutex_unlock(&irq->pool->lock);
 192         return err;
 193 }
 194
 195 static irqreturn_t irq_int_handler(int irq, void *nh)
 196 {
 197         atomic_notifier_call_chain(nh, 0, NULL);
 198         return IRQ_HANDLED;
 199 }
 200
 201 static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
 202 {
 203         snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
 204 }
 205
 206 static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
 207 {
 208         if (!pool->xa_num_irqs.max) {
 209                 /* in case we only have a single irq for the device */
 210                 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
 211                 return;
 212         }
 213
 214         if (!vecidx) {
 215                 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
 216                 return;
 217         }
 218
 219         snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
 220 }
 221
 222 struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
 223                                 struct irq_affinity_desc *af_desc,
 224                                 struct cpu_rmap **rmap)
 225 {
 226         struct mlx5_core_dev *dev = pool->dev;
 227         char name[MLX5_MAX_IRQ_NAME];
 228         struct mlx5_irq *irq;
 229         int err;
 230
 231         irq = kzalloc(sizeof(*irq), GFP_KERNEL);
 232         if (!irq)
 233                 return ERR_PTR(-ENOMEM);
 234         if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) {
 235                 /* The vector at index 0 is always statically allocated. If
 236                  * dynamic irq is not supported all vectors are statically
 237                  * allocated. In both cases just get the irq number and set
 238                  * the index.
 239                  */
 240                 irq->map.virq = pci_irq_vector(dev->pdev, i);
 241                 irq->map.index = i;
 242         } else {
 243                 irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc);
 244                 if (!irq->map.virq) {
 245                         err = irq->map.index;
 246                         goto err_alloc_irq;
 247                 }
 248         }
 249
 250         if (i && rmap && *rmap) {
 251 #ifdef CONFIG_RFS_ACCEL
 252                 err = irq_cpu_rmap_add(*rmap, irq->map.virq);
 253                 if (err)
 254                         goto err_irq_rmap;
 255 #endif
 256         }
 257         if (!mlx5_irq_pool_is_sf_pool(pool))
 258                 irq_set_name(pool, name, i);
 259         else
 260                 irq_sf_set_name(pool, name, i);
 261         ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
 262         snprintf(irq->name, MLX5_MAX_IRQ_NAME,
 263                  "%s@pci:%s", name, pci_name(dev->pdev));
 264         err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name,
 265                           &irq->nh);
 266         if (err) {
 267                 mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
 268                 goto err_req_irq;
 269         }
 270         if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
 271                 mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
 272                 err = -ENOMEM;
 273                 goto err_cpumask;
 274         }
 275         if (af_desc) {
 276                 cpumask_copy(irq->mask, &af_desc->mask);
 277                 irq_set_affinity_and_hint(irq->map.virq, irq->mask);
 278         }
 279         irq->pool = pool;
 280         irq->refcount = 1;
 281         irq->pool_index = i;
 282         err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL));
 283         if (err) {
 284                 mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
 285                               irq->pool_index, err);
 286                 goto err_xa;
 287         }
 288         return irq;
 289 err_xa:
 290         if (af_desc)
 291                 irq_update_affinity_hint(irq->map.virq, NULL);
 292         free_cpumask_var(irq->mask);
 293 err_cpumask:
 294         free_irq(irq->map.virq, &irq->nh);
 295 err_req_irq:
 296 #ifdef CONFIG_RFS_ACCEL
 297         if (i && rmap && *rmap) {
 298                 free_irq_cpu_rmap(*rmap);
 299                 *rmap = NULL;
 300         }
 301 err_irq_rmap:
 302 #endif
 303         if (i && pci_msix_can_alloc_dyn(dev->pdev))
 304                 pci_msix_free_irq(dev->pdev, irq->map);
 305 err_alloc_irq:
 306         kfree(irq);
 307         return ERR_PTR(err);
 308 }
 309
 310 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
 311 {
 312         int ret;
 313
 314         ret = irq_get(irq);
 315         if (!ret)
 316                 /* Something very bad happens here, we are enabling EQ
 317                  * on non-existing IRQ.
 318                  */
 319                 return -ENOENT;
 320         ret = atomic_notifier_chain_register(&irq->nh, nb);
 321         if (ret)
 322                 mlx5_irq_put(irq);
 323         return ret;
 324 }
 325
 326 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
 327 {
 328         int err = 0;
 329
 330         err = atomic_notifier_chain_unregister(&irq->nh, nb);
 331         mlx5_irq_put(irq);
 332         return err;
 333 }
 334
 335 struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
 336 {
 337         return irq->mask;
 338 }
 339
 340 int mlx5_irq_get_index(struct mlx5_irq *irq)
 341 {
 342         return irq->map.index;
 343 }
 344
 345 /* irq_pool API */
 346
 347 /* requesting an irq from a given pool according to given index */
 348 static struct mlx5_irq *
 349 irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
 350                         struct irq_affinity_desc *af_desc,
 351                         struct cpu_rmap **rmap)
 352 {
 353         struct mlx5_irq *irq;
 354
 355         mutex_lock(&pool->lock);
 356         irq = xa_load(&pool->irqs, vecidx);
 357         if (irq) {
 358                 mlx5_irq_get_locked(irq);
 359                 goto unlock;
 360         }
 361         irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap);
 362 unlock:
 363         mutex_unlock(&pool->lock);
 364         return irq;
 365 }
 366
 367 static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
 368 {
 369         return irq_table->sf_ctrl_pool;
 370 }
 371
 372 static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
 373 {
 374         return irq_table->sf_comp_pool;
 375 }
 376
 377 struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
 378 {
 379         struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
 380         struct mlx5_irq_pool *pool = NULL;
 381
 382         if (mlx5_core_is_sf(dev))
 383                 pool = sf_irq_pool_get(irq_table);
 384
 385         /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
 386          * the PF IRQs pool in case the SF pool doesn't exist.
 387          */
 388         return pool ? pool : irq_table->pcif_pool;
 389 }
 390
 391 static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
 392 {
 393         struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
 394         struct mlx5_irq_pool *pool = NULL;
 395
 396         if (mlx5_core_is_sf(dev))
 397                 pool = sf_ctrl_irq_pool_get(irq_table);
 398
 399         /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
 400          * the PF IRQs pool in case the SF pool doesn't exist.
 401          */
 402         return pool ? pool : irq_table->pcif_pool;
 403 }
 404
 405 /**
 406  * mlx5_irqs_release - release one or more IRQs back to the system.
 407  * @irqs: IRQs to be released.
 408  * @nirqs: number of IRQs to be released.
 409  */
 410 static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs)
 411 {
 412         int i;
 413
 414         for (i = 0; i < nirqs; i++) {
 415                 synchronize_irq(irqs[i]->map.virq);
 416                 mlx5_irq_put(irqs[i]);
 417         }
 418 }
 419
 420 /**
 421  * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
 422  * @ctrl_irq: ctrl IRQ to be released.
 423  */
 424 void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
 425 {
 426         mlx5_irqs_release(&ctrl_irq, 1);
 427 }
 428
 429 /**
 430  * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
 431  * @dev: mlx5 device that requesting the IRQ.
 432  *
 433  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
 434  */
 435 struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
 436 {
 437         struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
 438         struct irq_affinity_desc af_desc;
 439         struct mlx5_irq *irq;
 440
 441         cpumask_copy(&af_desc.mask, cpu_online_mask);
 442         af_desc.is_managed = false;
 443         if (!mlx5_irq_pool_is_sf_pool(pool)) {
 444                 /* In case we are allocating a control IRQ from a pci device's pool.
 445                  * This can happen also for a SF if the SFs pool is empty.
 446                  */
 447                 if (!pool->xa_num_irqs.max) {
 448                         cpumask_clear(&af_desc.mask);
 449                         /* In case we only have a single IRQ for PF/VF */
 450                         cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask);
 451                 }
 452                 /* Allocate the IRQ in index 0. The vector was already allocated */
 453                 irq = irq_pool_request_vector(pool, 0, &af_desc, NULL);
 454         } else {
 455                 irq = mlx5_irq_affinity_request(pool, &af_desc);
 456         }
 457
 458         return irq;
 459 }
 460
 461 /**
 462  * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
 463  * @dev: mlx5 device that requesting the IRQ.
 464  * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
 465  * provided.
 466  * @af_desc: affinity descriptor for this IRQ.
 467  * @rmap: pointer to reverse map pointer for completion interrupts
 468  *
 469  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
 470  */
 471 struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
 472                                   struct irq_affinity_desc *af_desc,
 473                                   struct cpu_rmap **rmap)
 474 {
 475         struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
 476         struct mlx5_irq_pool *pool;
 477         struct mlx5_irq *irq;
 478
 479         pool = irq_table->pcif_pool;
 480         irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap);
 481         if (IS_ERR(irq))
 482                 return irq;
 483         mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
 484                       irq->map.virq, cpumask_pr_args(&af_desc->mask),
 485                       irq->refcount / MLX5_EQ_REFS_PER_IRQ);
 486         return irq;
 487 }
 488
 489 /**
 490  * mlx5_msix_alloc - allocate msix interrupt
 491  * @dev: mlx5 device from which to request
 492  * @handler: interrupt handler
 493  * @affdesc: affinity descriptor
 494  * @name: interrupt name
 495  *
 496  * Returns: struct msi_map with result encoded.
 497  * Note: the caller must make sure to release the irq by calling
 498  *       mlx5_msix_free() if shutdown was initiated.
 499  */
 500 struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev,
 501                                irqreturn_t (*handler)(int, void *),
 502                                const struct irq_affinity_desc *affdesc,
 503                                const char *name)
 504 {
 505         struct msi_map map;
 506         int err;
 507
 508         if (!dev->pdev) {
 509                 map.virq = 0;
 510                 map.index = -EINVAL;
 511                 return map;
 512         }
 513
 514         map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc);
 515         if (!map.virq)
 516                 return map;
 517
 518         err = request_irq(map.virq, handler, 0, name, NULL);
 519         if (err) {
 520                 mlx5_core_warn(dev, "err %d\n", err);
 521                 pci_msix_free_irq(dev->pdev, map);
 522                 map.virq = 0;
 523                 map.index = -ENOMEM;
 524         }
 525         return map;
 526 }
 527 EXPORT_SYMBOL(mlx5_msix_alloc);
 528
 529 /**
 530  * mlx5_msix_free - free a previously allocated msix interrupt
 531  * @dev: mlx5 device associated with interrupt
 532  * @map: map previously returned by mlx5_msix_alloc()
 533  */
 534 void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map)
 535 {
 536         free_irq(map.virq, NULL);
 537         pci_msix_free_irq(dev->pdev, map);
 538 }
 539 EXPORT_SYMBOL(mlx5_msix_free);
 540
 541 /**
 542  * mlx5_irqs_release_vectors - release one or more IRQs back to the system.
 543  * @irqs: IRQs to be released.
 544  * @nirqs: number of IRQs to be released.
 545  */
 546 void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
 547 {
 548         mlx5_irqs_release(irqs, nirqs);
 549 }
 550
 551 /**
 552  * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device.
 553  * @dev: mlx5 device that is requesting the IRQs.
 554  * @cpus: CPUs array for binding the IRQs
 555  * @nirqs: number of IRQs to request.
 556  * @irqs: an output array of IRQs pointers.
 557  * @rmap: pointer to reverse map pointer for completion interrupts
 558  *
 559  * Each IRQ is bound to at most 1 CPU.
 560  * This function is requests nirqs IRQs, starting from @vecidx.
 561  *
 562  * This function returns the number of IRQs requested, (which might be smaller than
 563  * @nirqs), if successful, or a negative error code in case of an error.
 564  */
 565 int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
 566                               struct mlx5_irq **irqs, struct cpu_rmap **rmap)
 567 {
 568         struct irq_affinity_desc af_desc;
 569         struct mlx5_irq *irq;
 570         int i;
 571
 572         af_desc.is_managed = false;
 573         for (i = 0; i < nirqs; i++) {
 574                 cpumask_clear(&af_desc.mask);
 575                 cpumask_set_cpu(cpus[i], &af_desc.mask);
 576                 irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap);
 577                 if (IS_ERR(irq))
 578                         break;
 579                 irqs[i] = irq;
 580         }
 581
 582         return i ? i : PTR_ERR(irq);
 583 }
 584
 585 static struct mlx5_irq_pool *
 586 irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
 587                u32 min_threshold, u32 max_threshold)
 588 {
 589         struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
 590
 591         if (!pool)
 592                 return ERR_PTR(-ENOMEM);
 593         pool->dev = dev;
 594         mutex_init(&pool->lock);
 595         xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
 596         pool->xa_num_irqs.min = start;
 597         pool->xa_num_irqs.max = start + size - 1;
 598         if (name)
 599                 snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
 600                          "%s", name);
 601         pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
 602         pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
 603         mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
 604                       name, size, start);
 605         return pool;
 606 }
 607
 608 static void irq_pool_free(struct mlx5_irq_pool *pool)
 609 {
 610         struct mlx5_irq *irq;
 611         unsigned long index;
 612
 613         /* There are cases in which we are destrying the irq_table before
 614          * freeing all the IRQs, fast teardown for example. Hence, free the irqs
 615          * which might not have been freed.
 616          */
 617         xa_for_each(&pool->irqs, index, irq)
 618                 irq_release(irq);
 619         xa_destroy(&pool->irqs);
 620         mutex_destroy(&pool->lock);
 621         kfree(pool->irqs_per_cpu);
 622         kvfree(pool);
 623 }
 624
 625 static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec)
 626 {
 627         struct mlx5_irq_table *table = dev->priv.irq_table;
 628         int num_sf_ctrl_by_msix;
 629         int num_sf_ctrl_by_sfs;
 630         int num_sf_ctrl;
 631         int err;
 632
 633         /* init pcif_pool */
 634         table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL,
 635                                           MLX5_EQ_SHARE_IRQ_MIN_COMP,
 636                                           MLX5_EQ_SHARE_IRQ_MAX_COMP);
 637         if (IS_ERR(table->pcif_pool))
 638                 return PTR_ERR(table->pcif_pool);
 639         if (!mlx5_sf_max_functions(dev))
 640                 return 0;
 641         if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
 642                 mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
 643                 return 0;
 644         }
 645
 646         /* init sf_ctrl_pool */
 647         num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF);
 648         num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
 649                                           MLX5_SFS_PER_CTRL_IRQ);
 650         num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
 651         num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
 652         table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl,
 653                                              "mlx5_sf_ctrl",
 654                                              MLX5_EQ_SHARE_IRQ_MIN_CTRL,
 655                                              MLX5_EQ_SHARE_IRQ_MAX_CTRL);
 656         if (IS_ERR(table->sf_ctrl_pool)) {
 657                 err = PTR_ERR(table->sf_ctrl_pool);
 658                 goto err_pf;
 659         }
 660         /* init sf_comp_pool */
 661         table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl,
 662                                              sf_vec - num_sf_ctrl, "mlx5_sf_comp",
 663                                              MLX5_EQ_SHARE_IRQ_MIN_COMP,
 664                                              MLX5_EQ_SHARE_IRQ_MAX_COMP);
 665         if (IS_ERR(table->sf_comp_pool)) {
 666                 err = PTR_ERR(table->sf_comp_pool);
 667                 goto err_sf_ctrl;
 668         }
 669
 670         table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
 671         if (!table->sf_comp_pool->irqs_per_cpu) {
 672                 err = -ENOMEM;
 673                 goto err_irqs_per_cpu;
 674         }
 675
 676         return 0;
 677
 678 err_irqs_per_cpu:
 679         irq_pool_free(table->sf_comp_pool);
 680 err_sf_ctrl:
 681         irq_pool_free(table->sf_ctrl_pool);
 682 err_pf:
 683         irq_pool_free(table->pcif_pool);
 684         return err;
 685 }
 686
 687 static void irq_pools_destroy(struct mlx5_irq_table *table)
 688 {
 689         if (table->sf_ctrl_pool) {
 690                 irq_pool_free(table->sf_comp_pool);
 691                 irq_pool_free(table->sf_ctrl_pool);
 692         }
 693         irq_pool_free(table->pcif_pool);
 694 }
 695
 696 static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
 697 {
 698         struct mlx5_irq *irq;
 699         unsigned long index;
 700
 701         xa_for_each(&pool->irqs, index, irq)
 702                 free_irq(irq->map.virq, &irq->nh);
 703 }
 704
 705 static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
 706 {
 707         if (table->sf_ctrl_pool) {
 708                 mlx5_irq_pool_free_irqs(table->sf_comp_pool);
 709                 mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
 710         }
 711         mlx5_irq_pool_free_irqs(table->pcif_pool);
 712 }
 713
 714 /* irq_table API */
 715
 716 int mlx5_irq_table_init(struct mlx5_core_dev *dev)
 717 {
 718         struct mlx5_irq_table *irq_table;
 719
 720         if (mlx5_core_is_sf(dev))
 721                 return 0;
 722
 723         irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL,
 724                                   dev->priv.numa_node);
 725         if (!irq_table)
 726                 return -ENOMEM;
 727
 728         dev->priv.irq_table = irq_table;
 729         return 0;
 730 }
 731
 732 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
 733 {
 734         if (mlx5_core_is_sf(dev))
 735                 return;
 736
 737         kvfree(dev->priv.irq_table);
 738 }
 739
 740 int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
 741 {
 742         if (!table->pcif_pool->xa_num_irqs.max)
 743                 return 1;
 744         return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min;
 745 }
 746
 747 int mlx5_irq_table_create(struct mlx5_core_dev *dev)
 748 {
 749         int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
 750                       MLX5_CAP_GEN(dev, max_num_eqs) :
 751                       1 << MLX5_CAP_GEN(dev, log_max_eq);
 752         int total_vec;
 753         int pcif_vec;
 754         int req_vec;
 755         int err;
 756         int n;
 757
 758         if (mlx5_core_is_sf(dev))
 759                 return 0;
 760
 761         pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
 762         pcif_vec = min_t(int, pcif_vec, num_eqs);
 763
 764         total_vec = pcif_vec;
 765         if (mlx5_sf_max_functions(dev))
 766                 total_vec += MLX5_IRQ_CTRL_SF_MAX +
 767                         MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
 768         total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev));
 769         pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev));
 770
 771         req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec;
 772         n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX);
 773         if (n < 0)
 774                 return n;
 775
 776         err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec);
 777         if (err)
 778                 pci_free_irq_vectors(dev->pdev);
 779
 780         return err;
 781 }
 782
 783 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
 784 {
 785         struct mlx5_irq_table *table = dev->priv.irq_table;
 786
 787         if (mlx5_core_is_sf(dev))
 788                 return;
 789
 790         /* There are cases where IRQs still will be in used when we reaching
 791          * to here. Hence, making sure all the irqs are released.
 792          */
 793         irq_pools_destroy(table);
 794         pci_free_irq_vectors(dev->pdev);
 795 }
 796
 797 void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
 798 {
 799         struct mlx5_irq_table *table = dev->priv.irq_table;
 800
 801         if (mlx5_core_is_sf(dev))
 802                 return;
 803
 804         mlx5_irq_pools_free_irqs(table);
 805         pci_free_irq_vectors(dev->pdev);
 806 }
 807
 808 int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
 809 {
 810         if (table->sf_comp_pool)
 811                 return min_t(int, num_online_cpus(),
 812                              table->sf_comp_pool->xa_num_irqs.max -
 813                              table->sf_comp_pool->xa_num_irqs.min + 1);
 814         else
 815                 return mlx5_irq_table_get_num_comp(table);
 816 }
 817
 818 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
 819 {
 820 #ifdef CONFIG_MLX5_SF
 821         if (mlx5_core_is_sf(dev))
 822                 return dev->priv.parent_mdev->priv.irq_table;
 823 #endif
 824         return dev->priv.irq_table;
 825 }