From 627122280c878cf5d3cda2d2c5a0a8f6a7e35cb7 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Thu, 26 Jan 2023 00:28:07 +0200 Subject: [PATCH] RDMA/mlx5: Add work to remove temporary entries from the cache The non-cache mkeys are stored in the cache only to shorten restarting application time. Don't store them longer than needed. Configure cache entries that store non-cache MRs as temporary entries. If 30 seconds have passed and no user reclaimed the temporarily cached mkeys, an asynchronous work will destroy the mkeys entries. Link: https://lore.kernel.org/r/20230125222807.6921-7-michaelgur@nvidia.com Signed-off-by: Michael Guralnik Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 9 ++-- drivers/infiniband/hw/mlx5/mr.c | 94 ++++++++++++++++++++++++++++-------- drivers/infiniband/hw/mlx5/odp.c | 2 +- 3 files changed, 82 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index c9c80af..e8b0a92 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -750,6 +750,7 @@ struct mlx5_cache_ent { struct rb_node node; struct mlx5r_cache_rb_key rb_key; + u8 is_tmp:1; u8 disabled:1; u8 fill_to_high_water:1; @@ -783,6 +784,7 @@ struct mlx5_mkey_cache { struct mutex rb_lock; struct dentry *fs_root; unsigned long last_add; + struct delayed_work remove_ent_dwork; }; struct mlx5_ib_port_resources { @@ -1326,9 +1328,10 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev); int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev); -struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, - struct mlx5r_cache_rb_key rb_key, - bool persistent_entry); +struct mlx5_cache_ent * +mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev, + struct mlx5r_cache_rb_key rb_key, + bool persistent_entry); struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int access_flags, int access_mode, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index d4dff1b..c396b94 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -140,19 +140,16 @@ static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out) mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out); } - -static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings, - void *to_store) +static int push_mkey_locked(struct mlx5_cache_ent *ent, bool limit_pendings, + void *to_store) { XA_STATE(xas, &ent->mkeys, 0); void *curr; - xa_lock_irq(&ent->mkeys); if (limit_pendings && - (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) { - xa_unlock_irq(&ent->mkeys); + (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) return -EAGAIN; - } + while (1) { /* * This is cmpxchg (NULL, XA_ZERO_ENTRY) however this version @@ -191,6 +188,7 @@ static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings, break; xa_lock_irq(&ent->mkeys); } + xa_lock_irq(&ent->mkeys); if (xas_error(&xas)) return xas_error(&xas); if (WARN_ON(curr)) @@ -198,6 +196,17 @@ static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings, return 0; } +static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings, + void *to_store) +{ + int ret; + + xa_lock_irq(&ent->mkeys); + ret = push_mkey_locked(ent, limit_pendings, to_store); + xa_unlock_irq(&ent->mkeys); + return ret; +} + static void undo_push_reserve_mkey(struct mlx5_cache_ent *ent) { void *old; @@ -545,7 +554,7 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) { lockdep_assert_held(&ent->mkeys.xa_lock); - if (ent->disabled || READ_ONCE(ent->dev->fill_delay)) + if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp) return; if (ent->stored < ent->limit) { ent->fill_to_high_water = true; @@ -675,7 +684,6 @@ static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, struct mlx5_cache_ent *cur; int cmp; - mutex_lock(&cache->rb_lock); /* Figure out where to put new node */ while (*new) { cur = rb_entry(*new, struct mlx5_cache_ent, node); @@ -695,7 +703,6 @@ static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, rb_link_node(&ent->node, parent, new); rb_insert_color(&ent->node, &cache->rb_root); - mutex_unlock(&cache->rb_lock); return 0; } @@ -867,9 +874,10 @@ static void delay_time_func(struct timer_list *t) WRITE_ONCE(dev->fill_delay, 0); } -struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, - struct mlx5r_cache_rb_key rb_key, - bool persistent_entry) +struct mlx5_cache_ent * +mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev, + struct mlx5r_cache_rb_key rb_key, + bool persistent_entry) { struct mlx5_cache_ent *ent; int order; @@ -882,6 +890,7 @@ struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ); ent->rb_key = rb_key; ent->dev = dev; + ent->is_tmp = !persistent_entry; INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); @@ -905,11 +914,44 @@ struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, ent->limit = 0; mlx5_mkey_cache_debugfs_add_ent(dev, ent); + } else { + mod_delayed_work(ent->dev->cache.wq, + &ent->dev->cache.remove_ent_dwork, + msecs_to_jiffies(30 * 1000)); } return ent; } +static void remove_ent_work_func(struct work_struct *work) +{ + struct mlx5_mkey_cache *cache; + struct mlx5_cache_ent *ent; + struct rb_node *cur; + + cache = container_of(work, struct mlx5_mkey_cache, + remove_ent_dwork.work); + mutex_lock(&cache->rb_lock); + cur = rb_last(&cache->rb_root); + while (cur) { + ent = rb_entry(cur, struct mlx5_cache_ent, node); + cur = rb_prev(cur); + mutex_unlock(&cache->rb_lock); + + xa_lock_irq(&ent->mkeys); + if (!ent->is_tmp) { + xa_unlock_irq(&ent->mkeys); + mutex_lock(&cache->rb_lock); + continue; + } + xa_unlock_irq(&ent->mkeys); + + clean_keys(ent->dev, ent); + mutex_lock(&cache->rb_lock); + } + mutex_unlock(&cache->rb_lock); +} + int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mkey_cache *cache = &dev->cache; @@ -925,6 +967,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) mutex_init(&dev->slow_path_mutex); mutex_init(&dev->cache.rb_lock); dev->cache.rb_root = RB_ROOT; + INIT_DELAYED_WORK(&dev->cache.remove_ent_dwork, remove_ent_work_func); cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); if (!cache->wq) { mlx5_ib_warn(dev, "failed to create work queue\n"); @@ -934,9 +977,10 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); timer_setup(&dev->delay_timer, delay_time_func, 0); mlx5_mkey_cache_debugfs_init(dev); + mutex_lock(&cache->rb_lock); for (i = 0; i <= mkey_cache_max_order(dev); i++) { rb_key.ndescs = 1 << (i + 2); - ent = mlx5r_cache_create_ent(dev, rb_key, true); + ent = mlx5r_cache_create_ent_locked(dev, rb_key, true); if (IS_ERR(ent)) { ret = PTR_ERR(ent); goto err; @@ -947,6 +991,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) if (ret) goto err; + mutex_unlock(&cache->rb_lock); for (node = rb_first(root); node; node = rb_next(node)) { ent = rb_entry(node, struct mlx5_cache_ent, node); xa_lock_irq(&ent->mkeys); @@ -957,6 +1002,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) return 0; err: + mutex_unlock(&cache->rb_lock); mlx5_ib_warn(dev, "failed to create mkey cache entry\n"); return ret; } @@ -970,6 +1016,7 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) if (!dev->cache.wq) return 0; + cancel_delayed_work_sync(&dev->cache.remove_ent_dwork); mutex_lock(&dev->cache.rb_lock); for (node = rb_first(root); node; node = rb_next(node)) { ent = rb_entry(node, struct mlx5_cache_ent, node); @@ -1751,33 +1798,42 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, { struct mlx5_mkey_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; + int ret; if (mr->mmkey.cache_ent) { xa_lock_irq(&mr->mmkey.cache_ent->mkeys); mr->mmkey.cache_ent->in_use--; - xa_unlock_irq(&mr->mmkey.cache_ent->mkeys); goto end; } mutex_lock(&cache->rb_lock); ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); - mutex_unlock(&cache->rb_lock); if (ent) { if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { + if (ent->disabled) { + mutex_unlock(&cache->rb_lock); + return -EOPNOTSUPP; + } mr->mmkey.cache_ent = ent; + xa_lock_irq(&mr->mmkey.cache_ent->mkeys); + mutex_unlock(&cache->rb_lock); goto end; } } - ent = mlx5r_cache_create_ent(dev, mr->mmkey.rb_key, false); + ent = mlx5r_cache_create_ent_locked(dev, mr->mmkey.rb_key, false); + mutex_unlock(&cache->rb_lock); if (IS_ERR(ent)) return PTR_ERR(ent); mr->mmkey.cache_ent = ent; + xa_lock_irq(&mr->mmkey.cache_ent->mkeys); end: - return push_mkey(mr->mmkey.cache_ent, false, - xa_mk_value(mr->mmkey.key)); + ret = push_mkey_locked(mr->mmkey.cache_ent, false, + xa_mk_value(mr->mmkey.key)); + xa_unlock_irq(&mr->mmkey.cache_ent->mkeys); + return ret; } int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index c51d6c9..6f44709 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1598,7 +1598,7 @@ int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev) if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) return 0; - ent = mlx5r_cache_create_ent(dev, rb_key, true); + ent = mlx5r_cache_create_ent_locked(dev, rb_key, true); if (IS_ERR(ent)) return PTR_ERR(ent); -- 2.7.4