Merge tag 'v5.18' into rdma.git for-next
authorJason Gunthorpe <jgg@nvidia.com>
Tue, 24 May 2022 15:40:28 +0000 (12:40 -0300)
committerJason Gunthorpe <jgg@nvidia.com>
Tue, 24 May 2022 15:40:28 +0000 (12:40 -0300)
Following patches have dependencies.

Resolve the merge conflict in
drivers/net/ethernet/mellanox/mlx5/core/main.c by keeping the new names
for the fs functions following linux-next:

https://lore.kernel.org/r/20220519113529.226bc3e2@canb.auug.org.au/

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
1  2 
drivers/infiniband/hw/irdma/utils.c
drivers/infiniband/hw/irdma/verbs.c
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/sw/rdmavt/qp.c
drivers/infiniband/sw/rxe/rxe_resp.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/main.c

@@@ -258,18 -258,16 +258,16 @@@ int irdma_net_event(struct notifier_blo
        u32 local_ipaddr[4] = {};
        bool ipv4 = true;
  
-       real_dev = rdma_vlan_dev_real_dev(netdev);
-       if (!real_dev)
-               real_dev = netdev;
-       ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
-       if (!ibdev)
-               return NOTIFY_DONE;
-       iwdev = to_iwdev(ibdev);
        switch (event) {
        case NETEVENT_NEIGH_UPDATE:
+               real_dev = rdma_vlan_dev_real_dev(netdev);
+               if (!real_dev)
+                       real_dev = netdev;
+               ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
+               if (!ibdev)
+                       return NOTIFY_DONE;
+               iwdev = to_iwdev(ibdev);
                p = (__be32 *)neigh->primary_key;
                if (neigh->tbl->family == AF_INET6) {
                        ipv4 = false;
                        irdma_manage_arp_cache(iwdev->rf, neigh->ha,
                                               local_ipaddr, ipv4,
                                               IRDMA_ARP_DELETE);
+               ib_device_put(ibdev);
                break;
        default:
                break;
        }
  
-       ib_device_put(ibdev);
        return NOTIFY_DONE;
  }
  
@@@ -2498,150 -2495,3 +2495,150 @@@ bool irdma_cq_empty(struct irdma_cq *iw
  
        return polarity != ukcq->polarity;
  }
 +
 +void irdma_remove_cmpls_list(struct irdma_cq *iwcq)
 +{
 +      struct irdma_cmpl_gen *cmpl_node;
 +      struct list_head *tmp_node, *list_node;
 +
 +      list_for_each_safe (list_node, tmp_node, &iwcq->cmpl_generated) {
 +              cmpl_node = list_entry(list_node, struct irdma_cmpl_gen, list);
 +              list_del(&cmpl_node->list);
 +              kfree(cmpl_node);
 +      }
 +}
 +
 +int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info)
 +{
 +      struct irdma_cmpl_gen *cmpl;
 +
 +      if (list_empty(&iwcq->cmpl_generated))
 +              return -ENOENT;
 +      cmpl = list_first_entry_or_null(&iwcq->cmpl_generated, struct irdma_cmpl_gen, list);
 +      list_del(&cmpl->list);
 +      memcpy(cq_poll_info, &cmpl->cpi, sizeof(*cq_poll_info));
 +      kfree(cmpl);
 +
 +      ibdev_dbg(iwcq->ibcq.device,
 +                "VERBS: %s: Poll artificially generated completion for QP 0x%X, op %u, wr_id=0x%llx\n",
 +                __func__, cq_poll_info->qp_id, cq_poll_info->op_type,
 +                cq_poll_info->wr_id);
 +
 +      return 0;
 +}
 +
 +/**
 + * irdma_set_cpi_common_values - fill in values for polling info struct
 + * @cpi: resulting structure of cq_poll_info type
 + * @qp: QPair
 + * @qp_num: id of the QP
 + */
 +static void irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi,
 +                                      struct irdma_qp_uk *qp, u32 qp_num)
 +{
 +      cpi->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
 +      cpi->error = true;
 +      cpi->major_err = IRDMA_FLUSH_MAJOR_ERR;
 +      cpi->minor_err = FLUSH_GENERAL_ERR;
 +      cpi->qp_handle = (irdma_qp_handle)(uintptr_t)qp;
 +      cpi->qp_id = qp_num;
 +}
 +
 +static inline void irdma_comp_handler(struct irdma_cq *cq)
 +{
 +      if (!cq->ibcq.comp_handler)
 +              return;
 +      if (atomic_cmpxchg(&cq->armed, 1, 0))
 +              cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
 +}
 +
 +void irdma_generate_flush_completions(struct irdma_qp *iwqp)
 +{
 +      struct irdma_qp_uk *qp = &iwqp->sc_qp.qp_uk;
 +      struct irdma_ring *sq_ring = &qp->sq_ring;
 +      struct irdma_ring *rq_ring = &qp->rq_ring;
 +      struct irdma_cmpl_gen *cmpl;
 +      __le64 *sw_wqe;
 +      u64 wqe_qword;
 +      u32 wqe_idx;
 +      bool compl_generated = false;
 +      unsigned long flags1;
 +
 +      spin_lock_irqsave(&iwqp->iwscq->lock, flags1);
 +      if (irdma_cq_empty(iwqp->iwscq)) {
 +              unsigned long flags2;
 +
 +              spin_lock_irqsave(&iwqp->lock, flags2);
 +              while (IRDMA_RING_MORE_WORK(*sq_ring)) {
 +                      cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
 +                      if (!cmpl) {
 +                              spin_unlock_irqrestore(&iwqp->lock, flags2);
 +                              spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
 +                              return;
 +                      }
 +
 +                      wqe_idx = sq_ring->tail;
 +                      irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id);
 +
 +                      cmpl->cpi.wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
 +                      sw_wqe = qp->sq_base[wqe_idx].elem;
 +                      get_64bit_val(sw_wqe, 24, &wqe_qword);
 +                      cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, IRDMAQPSQ_OPCODE);
 +                      /* remove the SQ WR by moving SQ tail*/
 +                      IRDMA_RING_SET_TAIL(*sq_ring,
 +                              sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta);
 +
 +                      ibdev_dbg(iwqp->iwscq->ibcq.device,
 +                                "DEV: %s: adding wr_id = 0x%llx SQ Completion to list qp_id=%d\n",
 +                                __func__, cmpl->cpi.wr_id, qp->qp_id);
 +                      list_add_tail(&cmpl->list, &iwqp->iwscq->cmpl_generated);
 +                      compl_generated = true;
 +              }
 +              spin_unlock_irqrestore(&iwqp->lock, flags2);
 +              spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
 +              if (compl_generated)
 +                      irdma_comp_handler(iwqp->iwrcq);
 +      } else {
 +              spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
 +              mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
 +                               msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
 +      }
 +
 +      spin_lock_irqsave(&iwqp->iwrcq->lock, flags1);
 +      if (irdma_cq_empty(iwqp->iwrcq)) {
 +              unsigned long flags2;
 +
 +              spin_lock_irqsave(&iwqp->lock, flags2);
 +              while (IRDMA_RING_MORE_WORK(*rq_ring)) {
 +                      cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
 +                      if (!cmpl) {
 +                              spin_unlock_irqrestore(&iwqp->lock, flags2);
 +                              spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
 +                              return;
 +                      }
 +
 +                      wqe_idx = rq_ring->tail;
 +                      irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id);
 +
 +                      cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx];
 +                      cmpl->cpi.op_type = IRDMA_OP_TYPE_REC;
 +                      /* remove the RQ WR by moving RQ tail */
 +                      IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1);
 +                      ibdev_dbg(iwqp->iwrcq->ibcq.device,
 +                                "DEV: %s: adding wr_id = 0x%llx RQ Completion to list qp_id=%d, wqe_idx=%d\n",
 +                                __func__, cmpl->cpi.wr_id, qp->qp_id,
 +                                wqe_idx);
 +                      list_add_tail(&cmpl->list, &iwqp->iwrcq->cmpl_generated);
 +
 +                      compl_generated = true;
 +              }
 +              spin_unlock_irqrestore(&iwqp->lock, flags2);
 +              spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
 +              if (compl_generated)
 +                      irdma_comp_handler(iwqp->iwrcq);
 +      } else {
 +              spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
 +              mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
 +                               msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
 +      }
 +}
@@@ -25,9 -25,7 +25,9 @@@ static int irdma_query_device(struct ib
                            iwdev->netdev->dev_addr);
        props->fw_ver = (u64)irdma_fw_major_ver(&rf->sc_dev) << 32 |
                        irdma_fw_minor_ver(&rf->sc_dev);
 -      props->device_cap_flags = iwdev->device_cap_flags;
 +      props->device_cap_flags = IB_DEVICE_MEM_WINDOW |
 +                                IB_DEVICE_MEM_MGT_EXTENSIONS;
 +      props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
        props->vendor_id = pcidev->vendor;
        props->vendor_part_id = pcidev->device;
  
@@@ -535,9 -533,6 +535,9 @@@ static int irdma_destroy_qp(struct ib_q
        if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS)
                irdma_modify_qp_to_err(&iwqp->sc_qp);
  
 +      if (!iwqp->user_mode)
 +              cancel_delayed_work_sync(&iwqp->dwork_flush);
 +
        irdma_qp_rem_ref(&iwqp->ibqp);
        wait_for_completion(&iwqp->free_qp);
        irdma_free_lsmm_rsrc(iwqp);
@@@ -793,14 -788,6 +793,14 @@@ static int irdma_validate_qp_attrs(stru
        return 0;
  }
  
 +static void irdma_flush_worker(struct work_struct *work)
 +{
 +      struct delayed_work *dwork = to_delayed_work(work);
 +      struct irdma_qp *iwqp = container_of(dwork, struct irdma_qp, dwork_flush);
 +
 +      irdma_generate_flush_completions(iwqp);
 +}
 +
  /**
   * irdma_create_qp - create qp
   * @ibqp: ptr of qp
@@@ -920,7 -907,6 +920,7 @@@ static int irdma_create_qp(struct ib_q
                init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver;
                irdma_setup_virt_qp(iwdev, iwqp, &init_info);
        } else {
 +              INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker);
                init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER;
                err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr);
        }
@@@ -1412,11 -1398,11 +1412,11 @@@ int irdma_modify_qp_roce(struct ib_qp *
                        }
                        if (iwqp->ibqp_state > IB_QPS_RTS &&
                            !iwqp->flush_issued) {
 -                              iwqp->flush_issued = 1;
                                spin_unlock_irqrestore(&iwqp->lock, flags);
                                irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ |
                                                       IRDMA_FLUSH_RQ |
                                                       IRDMA_FLUSH_WAIT);
 +                              iwqp->flush_issued = 1;
                        } else {
                                spin_unlock_irqrestore(&iwqp->lock, flags);
                        }
@@@ -1632,13 -1618,13 +1632,13 @@@ int irdma_modify_qp(struct ib_qp *ibqp
  
        if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) {
                if (dont_wait) {
-                       if (iwqp->cm_id && iwqp->hw_tcp_state) {
+                       if (iwqp->hw_tcp_state) {
                                spin_lock_irqsave(&iwqp->lock, flags);
                                iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED;
                                iwqp->last_aeq = IRDMA_AE_RESET_SENT;
                                spin_unlock_irqrestore(&iwqp->lock, flags);
-                               irdma_cm_disconn(iwqp);
                        }
+                       irdma_cm_disconn(iwqp);
                } else {
                        int close_timer_started;
  
@@@ -1769,8 -1755,6 +1769,8 @@@ static int irdma_destroy_cq(struct ib_c
        unsigned long flags;
  
        spin_lock_irqsave(&iwcq->lock, flags);
 +      if (!list_empty(&iwcq->cmpl_generated))
 +              irdma_remove_cmpls_list(iwcq);
        if (!list_empty(&iwcq->resize_list))
                irdma_process_resize_list(iwcq, iwdev, NULL);
        spin_unlock_irqrestore(&iwcq->lock, flags);
@@@ -1975,7 -1959,6 +1975,7 @@@ static int irdma_create_cq(struct ib_c
        cq->back_cq = iwcq;
        spin_lock_init(&iwcq->lock);
        INIT_LIST_HEAD(&iwcq->resize_list);
 +      INIT_LIST_HEAD(&iwcq->cmpl_generated);
        info.dev = dev;
        ukinfo->cq_size = max(entries, 4);
        ukinfo->cq_id = cq_num;
@@@ -3061,12 -3044,15 +3061,12 @@@ static int irdma_post_send(struct ib_q
        unsigned long flags;
        bool inv_stag;
        struct irdma_ah *ah;
 -      bool reflush = false;
  
        iwqp = to_iwqp(ibqp);
        ukqp = &iwqp->sc_qp.qp_uk;
        dev = &iwqp->iwdev->rf->sc_dev;
  
        spin_lock_irqsave(&iwqp->lock, flags);
 -      if (iwqp->flush_issued && ukqp->sq_flush_complete)
 -              reflush = true;
        while (ib_wr) {
                memset(&info, 0, sizeof(info));
                inv_stag = false;
                ib_wr = ib_wr->next;
        }
  
 -      if (!iwqp->flush_issued && iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS) {
 -              irdma_uk_qp_post_wr(ukqp);
 +      if (!iwqp->flush_issued) {
 +              if (iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS)
 +                      irdma_uk_qp_post_wr(ukqp);
                spin_unlock_irqrestore(&iwqp->lock, flags);
 -      } else if (reflush) {
 -              ukqp->sq_flush_complete = false;
 -              spin_unlock_irqrestore(&iwqp->lock, flags);
 -              irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_REFLUSH);
        } else {
                spin_unlock_irqrestore(&iwqp->lock, flags);
 +              mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
 +                               msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
        }
        if (err)
                *bad_wr = ib_wr;
@@@ -3246,11 -3233,14 +3246,11 @@@ static int irdma_post_recv(struct ib_q
        struct irdma_post_rq_info post_recv = {};
        unsigned long flags;
        int err = 0;
 -      bool reflush = false;
  
        iwqp = to_iwqp(ibqp);
        ukqp = &iwqp->sc_qp.qp_uk;
  
        spin_lock_irqsave(&iwqp->lock, flags);
 -      if (iwqp->flush_issued && ukqp->rq_flush_complete)
 -              reflush = true;
        while (ib_wr) {
                post_recv.num_sges = ib_wr->num_sge;
                post_recv.wr_id = ib_wr->wr_id;
        }
  
  out:
 -      if (reflush) {
 -              ukqp->rq_flush_complete = false;
 -              spin_unlock_irqrestore(&iwqp->lock, flags);
 -              irdma_flush_wqes(iwqp, IRDMA_FLUSH_RQ | IRDMA_REFLUSH);
 -      } else {
 -              spin_unlock_irqrestore(&iwqp->lock, flags);
 -      }
 +      spin_unlock_irqrestore(&iwqp->lock, flags);
 +      if (iwqp->flush_issued)
 +              mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
 +                               msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
  
        if (err)
                *bad_wr = ib_wr;
@@@ -3481,11 -3474,6 +3481,11 @@@ static int __irdma_poll_cq(struct irdma
        /* check the current CQ for new cqes */
        while (npolled < num_entries) {
                ret = irdma_poll_one(ukcq, cur_cqe, entry + npolled);
 +              if (ret == -ENOENT) {
 +                      ret = irdma_generated_cmpls(iwcq, cur_cqe);
 +                      if (!ret)
 +                              irdma_process_cqe(entry + npolled, cur_cqe);
 +              }
                if (!ret) {
                        ++npolled;
                        cq_new_cqe = true;
@@@ -3567,13 -3555,13 +3567,13 @@@ static int irdma_req_notify_cq(struct i
        if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED && notify_flags != IB_CQ_SOLICITED)
                promo_event = true;
  
 -      if (!iwcq->armed || promo_event) {
 -              iwcq->armed = true;
 +      if (!atomic_cmpxchg(&iwcq->armed, 0, 1) || promo_event) {
                iwcq->last_notify = cq_notify;
                irdma_uk_cq_request_notification(ukcq, cq_notify);
        }
  
 -      if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && !irdma_cq_empty(iwcq))
 +      if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
 +          (!irdma_cq_empty(iwcq) || !list_empty(&iwcq->cmpl_generated)))
                ret = 1;
        spin_unlock_irqrestore(&iwcq->lock, flags);
  
  #include <rdma/ib_verbs.h>
  #include "dm.h"
  #include "mlx5_ib.h"
 -
 -/*
 - * We can't use an array for xlt_emergency_page because dma_map_single doesn't
 - * work on kernel modules memory
 - */
 -void *xlt_emergency_page;
 -static DEFINE_MUTEX(xlt_emergency_page_mutex);
 +#include "umr.h"
  
  enum {
        MAX_PENDING_REG_MR = 8,
@@@ -122,6 -128,11 +122,6 @@@ mlx5_ib_create_mkey_cb(struct mlx5_ib_d
  static int mr_cache_max_order(struct mlx5_ib_dev *dev);
  static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
  
 -static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
 -{
 -      return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
 -}
 -
  static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  {
        WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
@@@ -563,8 -574,10 +563,10 @@@ static void __cache_work_func(struct ml
                spin_lock_irq(&ent->lock);
                if (ent->disabled)
                        goto out;
-               if (need_delay)
+               if (need_delay) {
                        queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
+                       goto out;
+               }
                remove_cache_mr_locked(ent);
                queue_adjust_cache_locked(ent);
        }
@@@ -587,7 -600,7 +589,7 @@@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(
        struct mlx5_ib_mr *mr;
  
        /* Matches access in alloc_cache_mr() */
 -      if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
 +      if (!mlx5r_umr_can_reconfig(dev, 0, access_flags))
                return ERR_PTR(-EOPNOTSUPP);
  
        spin_lock_irq(&ent->lock);
@@@ -614,6 -627,7 +616,7 @@@ static void mlx5_mr_cache_free(struct m
  {
        struct mlx5_cache_ent *ent = mr->cache_ent;
  
+       WRITE_ONCE(dev->cache.last_add, jiffies);
        spin_lock_irq(&ent->lock);
        list_add_tail(&mr->list, &ent->head);
        ent->available_mrs++;
@@@ -727,7 -741,7 +730,7 @@@ int mlx5_mr_cache_init(struct mlx5_ib_d
                ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
                if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
                    !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
 -                  mlx5_ib_can_load_pas_with_umr(dev, 0))
 +                  mlx5r_umr_can_load_pas(dev, 0))
                        ent->limit = dev->mdev->profile.mr_cache[i].limit;
                else
                        ent->limit = 0;
@@@ -834,6 -848,49 +837,6 @@@ static int mr_cache_max_order(struct ml
        return MLX5_MAX_UMR_SHIFT;
  }
  
 -static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
 -{
 -      struct mlx5_ib_umr_context *context =
 -              container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
 -
 -      context->status = wc->status;
 -      complete(&context->done);
 -}
 -
 -static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
 -{
 -      context->cqe.done = mlx5_ib_umr_done;
 -      context->status = -1;
 -      init_completion(&context->done);
 -}
 -
 -static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
 -                                struct mlx5_umr_wr *umrwr)
 -{
 -      struct umr_common *umrc = &dev->umrc;
 -      const struct ib_send_wr *bad;
 -      int err;
 -      struct mlx5_ib_umr_context umr_context;
 -
 -      mlx5_ib_init_umr_context(&umr_context);
 -      umrwr->wr.wr_cqe = &umr_context.cqe;
 -
 -      down(&umrc->sem);
 -      err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
 -      if (err) {
 -              mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
 -      } else {
 -              wait_for_completion(&umr_context.done);
 -              if (umr_context.status != IB_WC_SUCCESS) {
 -                      mlx5_ib_warn(dev, "reg umr failed (%u)\n",
 -                                   umr_context.status);
 -                      err = -EFAULT;
 -              }
 -      }
 -      up(&umrc->sem);
 -      return err;
 -}
 -
  static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
                                                      unsigned int order)
  {
@@@ -892,7 -949,7 +895,7 @@@ static struct mlx5_ib_mr *alloc_cacheab
         * cache then synchronously create an uncached one.
         */
        if (!ent || ent->limit == 0 ||
 -          !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) {
 +          !mlx5r_umr_can_reconfig(dev, 0, access_flags)) {
                mutex_lock(&dev->slow_path_mutex);
                mr = reg_create(pd, umem, iova, access_flags, page_size, false);
                mutex_unlock(&dev->slow_path_mutex);
        return mr;
  }
  
 -#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
 -                          MLX5_UMR_MTT_ALIGNMENT)
 -#define MLX5_SPARE_UMR_CHUNK 0x10000
 -
 -/*
 - * Allocate a temporary buffer to hold the per-page information to transfer to
 - * HW. For efficiency this should be as large as it can be, but buffer
 - * allocation failure is not allowed, so try smaller sizes.
 - */
 -static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
 -{
 -      const size_t xlt_chunk_align =
 -              MLX5_UMR_MTT_ALIGNMENT / ent_size;
 -      size_t size;
 -      void *res = NULL;
 -
 -      static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
 -
 -      /*
 -       * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
 -       * allocation can't trigger any kind of reclaim.
 -       */
 -      might_sleep();
 -
 -      gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
 -
 -      /*
 -       * If the system already has a suitable high order page then just use
 -       * that, but don't try hard to create one. This max is about 1M, so a
 -       * free x86 huge page will satisfy it.
 -       */
 -      size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
 -                   MLX5_MAX_UMR_CHUNK);
 -      *nents = size / ent_size;
 -      res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
 -                                     get_order(size));
 -      if (res)
 -              return res;
 -
 -      if (size > MLX5_SPARE_UMR_CHUNK) {
 -              size = MLX5_SPARE_UMR_CHUNK;
 -              *nents = size / ent_size;
 -              res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
 -                                             get_order(size));
 -              if (res)
 -                      return res;
 -      }
 -
 -      *nents = PAGE_SIZE / ent_size;
 -      res = (void *)__get_free_page(gfp_mask);
 -      if (res)
 -              return res;
 -
 -      mutex_lock(&xlt_emergency_page_mutex);
 -      memset(xlt_emergency_page, 0, PAGE_SIZE);
 -      return xlt_emergency_page;
 -}
 -
 -static void mlx5_ib_free_xlt(void *xlt, size_t length)
 -{
 -      if (xlt == xlt_emergency_page) {
 -              mutex_unlock(&xlt_emergency_page_mutex);
 -              return;
 -      }
 -
 -      free_pages((unsigned long)xlt, get_order(length));
 -}
 -
 -/*
 - * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for
 - * submission.
 - */
 -static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
 -                                 struct mlx5_umr_wr *wr, struct ib_sge *sg,
 -                                 size_t nents, size_t ent_size,
 -                                 unsigned int flags)
 -{
 -      struct mlx5_ib_dev *dev = mr_to_mdev(mr);
 -      struct device *ddev = &dev->mdev->pdev->dev;
 -      dma_addr_t dma;
 -      void *xlt;
 -
 -      xlt = mlx5_ib_alloc_xlt(&nents, ent_size,
 -                              flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
 -                                                               GFP_KERNEL);
 -      sg->length = nents * ent_size;
 -      dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
 -      if (dma_mapping_error(ddev, dma)) {
 -              mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
 -              mlx5_ib_free_xlt(xlt, sg->length);
 -              return NULL;
 -      }
 -      sg->addr = dma;
 -      sg->lkey = dev->umrc.pd->local_dma_lkey;
 -
 -      memset(wr, 0, sizeof(*wr));
 -      wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
 -      if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
 -              wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
 -      wr->wr.sg_list = sg;
 -      wr->wr.num_sge = 1;
 -      wr->wr.opcode = MLX5_IB_WR_UMR;
 -      wr->pd = mr->ibmr.pd;
 -      wr->mkey = mr->mmkey.key;
 -      wr->length = mr->ibmr.length;
 -      wr->virt_addr = mr->ibmr.iova;
 -      wr->access_flags = mr->access_flags;
 -      wr->page_shift = mr->page_shift;
 -      wr->xlt_size = sg->length;
 -      return xlt;
 -}
 -
 -static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
 -                                 struct ib_sge *sg)
 -{
 -      struct device *ddev = &dev->mdev->pdev->dev;
 -
 -      dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
 -      mlx5_ib_free_xlt(xlt, sg->length);
 -}
 -
 -static unsigned int xlt_wr_final_send_flags(unsigned int flags)
 -{
 -      unsigned int res = 0;
 -
 -      if (flags & MLX5_IB_UPD_XLT_ENABLE)
 -              res |= MLX5_IB_SEND_UMR_ENABLE_MR |
 -                     MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
 -                     MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
 -      if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS)
 -              res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
 -      if (flags & MLX5_IB_UPD_XLT_ADDR)
 -              res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
 -      return res;
 -}
 -
 -int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
 -                     int page_shift, int flags)
 -{
 -      struct mlx5_ib_dev *dev = mr_to_mdev(mr);
 -      struct device *ddev = &dev->mdev->pdev->dev;
 -      void *xlt;
 -      struct mlx5_umr_wr wr;
 -      struct ib_sge sg;
 -      int err = 0;
 -      int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
 -                             ? sizeof(struct mlx5_klm)
 -                             : sizeof(struct mlx5_mtt);
 -      const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
 -      const int page_mask = page_align - 1;
 -      size_t pages_mapped = 0;
 -      size_t pages_to_map = 0;
 -      size_t pages_iter;
 -      size_t size_to_map = 0;
 -      size_t orig_sg_length;
 -
 -      if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
 -          !umr_can_use_indirect_mkey(dev))
 -              return -EPERM;
 -
 -      if (WARN_ON(!mr->umem->is_odp))
 -              return -EINVAL;
 -
 -      /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
 -       * so we need to align the offset and length accordingly
 -       */
 -      if (idx & page_mask) {
 -              npages += idx & page_mask;
 -              idx &= ~page_mask;
 -      }
 -      pages_to_map = ALIGN(npages, page_align);
 -
 -      xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags);
 -      if (!xlt)
 -              return -ENOMEM;
 -      pages_iter = sg.length / desc_size;
 -      orig_sg_length = sg.length;
 -
 -      if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
 -              struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
 -              size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
 -
 -              pages_to_map = min_t(size_t, pages_to_map, max_pages);
 -      }
 -
 -      wr.page_shift = page_shift;
 -
 -      for (pages_mapped = 0;
 -           pages_mapped < pages_to_map && !err;
 -           pages_mapped += pages_iter, idx += pages_iter) {
 -              npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
 -              size_to_map = npages * desc_size;
 -              dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
 -                                      DMA_TO_DEVICE);
 -              mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
 -              dma_sync_single_for_device(ddev, sg.addr, sg.length,
 -                                         DMA_TO_DEVICE);
 -
 -              sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
 -
 -              if (pages_mapped + pages_iter >= pages_to_map)
 -                      wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
 -
 -              wr.offset = idx * desc_size;
 -              wr.xlt_size = sg.length;
 -
 -              err = mlx5_ib_post_send_wait(dev, &wr);
 -      }
 -      sg.length = orig_sg_length;
 -      mlx5_ib_unmap_free_xlt(dev, xlt, &sg);
 -      return err;
 -}
 -
 -/*
 - * Send the DMA list to the HW for a normal MR using UMR.
 - * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
 - * flag may be used.
 - */
 -int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
 -{
 -      struct mlx5_ib_dev *dev = mr_to_mdev(mr);
 -      struct device *ddev = &dev->mdev->pdev->dev;
 -      struct ib_block_iter biter;
 -      struct mlx5_mtt *cur_mtt;
 -      struct mlx5_umr_wr wr;
 -      size_t orig_sg_length;
 -      struct mlx5_mtt *mtt;
 -      size_t final_size;
 -      struct ib_sge sg;
 -      int err = 0;
 -
 -      if (WARN_ON(mr->umem->is_odp))
 -              return -EINVAL;
 -
 -      mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg,
 -                                  ib_umem_num_dma_blocks(mr->umem,
 -                                                         1 << mr->page_shift),
 -                                  sizeof(*mtt), flags);
 -      if (!mtt)
 -              return -ENOMEM;
 -      orig_sg_length = sg.length;
 -
 -      cur_mtt = mtt;
 -      rdma_for_each_block (mr->umem->sgt_append.sgt.sgl, &biter,
 -                           mr->umem->sgt_append.sgt.nents,
 -                           BIT(mr->page_shift)) {
 -              if (cur_mtt == (void *)mtt + sg.length) {
 -                      dma_sync_single_for_device(ddev, sg.addr, sg.length,
 -                                                 DMA_TO_DEVICE);
 -                      err = mlx5_ib_post_send_wait(dev, &wr);
 -                      if (err)
 -                              goto err;
 -                      dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
 -                                              DMA_TO_DEVICE);
 -                      wr.offset += sg.length;
 -                      cur_mtt = mtt;
 -              }
 -
 -              cur_mtt->ptag =
 -                      cpu_to_be64(rdma_block_iter_dma_address(&biter) |
 -                                  MLX5_IB_MTT_PRESENT);
 -
 -              if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
 -                      cur_mtt->ptag = 0;
 -
 -              cur_mtt++;
 -      }
 -
 -      final_size = (void *)cur_mtt - (void *)mtt;
 -      sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
 -      memset(cur_mtt, 0, sg.length - final_size);
 -      wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
 -      wr.xlt_size = sg.length;
 -
 -      dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
 -      err = mlx5_ib_post_send_wait(dev, &wr);
 -
 -err:
 -      sg.length = orig_sg_length;
 -      mlx5_ib_unmap_free_xlt(dev, mtt, &sg);
 -      return err;
 -}
 -
  /*
   * If ibmr is NULL it will be allocated by reg_create.
   * Else, the given ibmr will be used.
@@@ -1101,7 -1441,7 +1104,7 @@@ static struct ib_mr *create_real_mr(str
        bool xlt_with_umr;
        int err;
  
 -      xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length);
 +      xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length);
        if (xlt_with_umr) {
                mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
        } else {
                 * configured properly but left disabled. It is safe to go ahead
                 * and configure it again via UMR while enabling it.
                 */
 -              err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
 +              err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
                if (err) {
                        mlx5_ib_dereg_mr(&mr->ibmr, NULL);
                        return ERR_PTR(err);
@@@ -1164,7 -1504,7 +1167,7 @@@ static struct ib_mr *create_user_odp_mr
        }
  
        /* ODP requires xlt update via umr to work. */
 -      if (!mlx5_ib_can_load_pas_with_umr(dev, length))
 +      if (!mlx5r_umr_can_load_pas(dev, length))
                return ERR_PTR(-EINVAL);
  
        odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
@@@ -1226,7 -1566,7 +1229,7 @@@ static void mlx5_ib_dmabuf_invalidate_c
        if (!umem_dmabuf->sgt)
                return;
  
 -      mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
 +      mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
        ib_umem_dmabuf_unmap_pages(umem_dmabuf);
  }
  
@@@ -1254,7 -1594,7 +1257,7 @@@ struct ib_mr *mlx5_ib_reg_user_mr_dmabu
                    offset, virt_addr, length, fd, access_flags);
  
        /* dmabuf requires xlt update via umr to work. */
 -      if (!mlx5_ib_can_load_pas_with_umr(dev, length))
 +      if (!mlx5r_umr_can_load_pas(dev, length))
                return ERR_PTR(-EINVAL);
  
        umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd,
@@@ -1291,6 -1631,31 +1294,6 @@@ err_dereg_mr
        return ERR_PTR(err);
  }
  
 -/**
 - * revoke_mr - Fence all DMA on the MR
 - * @mr: The MR to fence
 - *
 - * Upon return the NIC will not be doing any DMA to the pages under the MR,
 - * and any DMA in progress will be completed. Failure of this function
 - * indicates the HW has failed catastrophically.
 - */
 -static int revoke_mr(struct mlx5_ib_mr *mr)
 -{
 -      struct mlx5_umr_wr umrwr = {};
 -
 -      if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
 -              return 0;
 -
 -      umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
 -                            MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
 -      umrwr.wr.opcode = MLX5_IB_WR_UMR;
 -      umrwr.pd = mr_to_mdev(mr)->umrc.pd;
 -      umrwr.mkey = mr->mmkey.key;
 -      umrwr.ignore_free_state = 1;
 -
 -      return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr);
 -}
 -
  /*
   * True if the change in access flags can be done via UMR, only some access
   * flags can be updated.
@@@ -1304,8 -1669,32 +1307,8 @@@ static bool can_use_umr_rereg_access(st
        if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
                      IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING))
                return false;
 -      return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags,
 -                                           target_access_flags);
 -}
 -
 -static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
 -                             int access_flags)
 -{
 -      struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
 -      struct mlx5_umr_wr umrwr = {
 -              .wr = {
 -                      .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
 -                                    MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS,
 -                      .opcode = MLX5_IB_WR_UMR,
 -              },
 -              .mkey = mr->mmkey.key,
 -              .pd = pd,
 -              .access_flags = access_flags,
 -      };
 -      int err;
 -
 -      err = mlx5_ib_post_send_wait(dev, &umrwr);
 -      if (err)
 -              return err;
 -
 -      mr->access_flags = access_flags;
 -      return 0;
 +      return mlx5r_umr_can_reconfig(dev, current_access_flags,
 +                                    target_access_flags);
  }
  
  static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
        /* We only track the allocated sizes of MRs from the cache */
        if (!mr->cache_ent)
                return false;
 -      if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length))
 +      if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
                return false;
  
        *page_size =
@@@ -1343,7 -1732,7 +1346,7 @@@ static int umr_rereg_pas(struct mlx5_ib
         * with it. This ensure the change is atomic relative to any use of the
         * MR.
         */
 -      err = revoke_mr(mr);
 +      err = mlx5r_umr_revoke_mr(mr);
        if (err)
                return err;
  
        mr->ibmr.length = new_umem->length;
        mr->page_shift = order_base_2(page_size);
        mr->umem = new_umem;
 -      err = mlx5_ib_update_mr_pas(mr, upd_flags);
 +      err = mlx5r_umr_update_mr_pas(mr, upd_flags);
        if (err) {
                /*
                 * The MR is revoked at this point so there is no issue to free
@@@ -1408,8 -1797,7 +1411,8 @@@ struct ib_mr *mlx5_ib_rereg_user_mr(str
                /* Fast path for PD/access change */
                if (can_use_umr_rereg_access(dev, mr->access_flags,
                                             new_access_flags)) {
 -                      err = umr_rereg_pd_access(mr, new_pd, new_access_flags);
 +                      err = mlx5r_umr_rereg_pd_access(mr, new_pd,
 +                                                      new_access_flags);
                        if (err)
                                return ERR_PTR(err);
                        return NULL;
                 * Only one active MR can refer to a umem at one time, revoke
                 * the old MR before assigning the umem to the new one.
                 */
 -              err = revoke_mr(mr);
 +              err = mlx5r_umr_revoke_mr(mr);
                if (err)
                        return ERR_PTR(err);
                umem = mr->umem;
@@@ -1567,7 -1955,7 +1570,7 @@@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr
  
        /* Stop DMA */
        if (mr->cache_ent) {
 -              if (revoke_mr(mr)) {
 +              if (mlx5r_umr_revoke_mr(mr)) {
                        spin_lock_irq(&mr->cache_ent->lock);
                        mr->cache_ent->total_mrs--;
                        spin_unlock_irq(&mr->cache_ent->lock);
@@@ -2775,7 -2775,7 +2775,7 @@@ void rvt_qp_iter(struct rvt_dev_info *r
  EXPORT_SYMBOL(rvt_qp_iter);
  
  /*
 - * This should be called with s_lock held.
 + * This should be called with s_lock and r_lock held.
   */
  void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
                       enum ib_wc_status status)
@@@ -3134,9 -3134,7 +3134,9 @@@ send_comp
        rvp->n_loop_pkts++;
  flush_send:
        sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
 +      spin_lock(&sqp->r_lock);
        rvt_send_complete(sqp, wqe, send_status);
 +      spin_unlock(&sqp->r_lock);
        if (local_ops) {
                atomic_dec(&sqp->local_ops_pending);
                local_ops = 0;
@@@ -3190,11 -3188,13 +3190,15 @@@ serr
        spin_unlock_irqrestore(&qp->r_lock, flags);
  serr_no_r_lock:
        spin_lock_irqsave(&sqp->s_lock, flags);
 +      spin_lock(&sqp->r_lock);
        rvt_send_complete(sqp, wqe, send_status);
 +      spin_unlock(&sqp->r_lock);
        if (sqp->ibqp.qp_type == IB_QPT_RC) {
-               int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+               int lastwqe;
+               spin_lock(&sqp->r_lock);
+               lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+               spin_unlock(&sqp->r_lock);
  
                sqp->s_flags &= ~RVT_S_BUSY;
                spin_unlock_irqrestore(&sqp->s_lock, flags);
@@@ -277,6 -277,7 +277,6 @@@ static enum resp_states check_op_valid(
                break;
  
        case IB_QPT_UD:
 -      case IB_QPT_SMI:
        case IB_QPT_GSI:
                break;
  
@@@ -576,7 -577,8 +576,7 @@@ static enum resp_states process_atomic(
  
        qp->resp.atomic_orig = *vaddr;
  
 -      if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP ||
 -          pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) {
 +      if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP) {
                if (*vaddr == atmeth_comp(pkt))
                        *vaddr = atmeth_swap_add(pkt);
        } else {
@@@ -678,6 -680,11 +678,11 @@@ static struct resp_res *rxe_prepare_rea
   * It is assumed that the access permissions if originally good
   * are OK and the mappings to be unchanged.
   *
+  * TODO: If someone reregisters an MR to change its size or
+  * access permissions during the processing of an RDMA read
+  * we should kill the responder resource and complete the
+  * operation with an error.
+  *
   * Return: mr on success else NULL
   */
  static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
  
        if (rkey_is_mw(rkey)) {
                mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
-               if (!mw || mw->rkey != rkey)
+               if (!mw)
                        return NULL;
  
-               if (mw->state != RXE_MW_STATE_VALID) {
+               mr = mw->mr;
+               if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID ||
+                   !mr || mr->state != RXE_MR_STATE_VALID) {
                        rxe_put(mw);
                        return NULL;
                }
  
-               mr = mw->mr;
+               rxe_get(mr);
                rxe_put(mw);
-       } else {
-               mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
-               if (!mr || mr->rkey != rkey)
-                       return NULL;
+               return mr;
        }
  
-       if (mr->state != RXE_MR_STATE_VALID) {
+       mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
+       if (!mr)
+               return NULL;
+       if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) {
                rxe_put(mr);
                return NULL;
        }
@@@ -734,8 -745,14 +743,14 @@@ static enum resp_states read_reply(stru
        }
  
        if (res->state == rdatm_res_state_new) {
-               mr = qp->resp.mr;
-               qp->resp.mr = NULL;
+               if (!res->replay) {
+                       mr = qp->resp.mr;
+                       qp->resp.mr = NULL;
+               } else {
+                       mr = rxe_recheck_mr(qp, res->read.rkey);
+                       if (!mr)
+                               return RESPST_ERR_RKEY_VIOLATION;
+               }
  
                if (res->read.resid <= mtu)
                        opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;
@@@ -817,6 -834,7 +832,6 @@@ static enum resp_states execute(struct 
  
        if (pkt->mask & RXE_SEND_MASK) {
                if (qp_type(qp) == IB_QPT_UD ||
 -                  qp_type(qp) == IB_QPT_SMI ||
                    qp_type(qp) == IB_QPT_GSI) {
                        if (skb->protocol == htons(ETH_P_IP)) {
                                memset(&hdr.reserved, 0,
@@@ -1247,8 -1265,7 +1262,8 @@@ int rxe_responder(void *arg
        struct rxe_pkt_info *pkt = NULL;
        int ret = 0;
  
 -      rxe_get(qp);
 +      if (!rxe_get(qp))
 +              return -EAGAIN;
  
        qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
  
@@@ -47,8 -47,9 +47,8 @@@
  #include "en_rep.h"
  #include "en_accel/ipsec.h"
  #include "en_accel/en_accel.h"
 -#include "en_accel/tls.h"
 -#include "accel/ipsec.h"
 -#include "accel/tls.h"
 +#include "en_accel/ktls.h"
 +#include "en_accel/ipsec_offload.h"
  #include "lib/vxlan.h"
  #include "lib/clock.h"
  #include "en/port.h"
@@@ -67,6 -68,7 +67,6 @@@
  #include "en/ptp.h"
  #include "qos.h"
  #include "en/trap.h"
 -#include "fpga/ipsec.h"
  
  bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
  {
@@@ -1034,6 -1036,9 +1034,6 @@@ int mlx5e_open_rq(struct mlx5e_params *
        if (err)
                goto err_destroy_rq;
  
 -      if (mlx5e_is_tls_on(rq->priv) && !mlx5e_accel_is_ktls_device(mdev))
 -              __set_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state); /* must be FPGA */
 -
        if (MLX5_CAP_ETH(mdev, cqe_checksum_full))
                __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state);
  
@@@ -1329,7 -1334,7 +1329,7 @@@ static int mlx5e_alloc_txqsq(struct mlx
        INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
        if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
                set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
 -      if (MLX5_IPSEC_DEV(c->priv->mdev))
 +      if (mlx5_ipsec_device_caps(c->priv->mdev))
                set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
        if (param->is_mpw)
                set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state);
@@@ -3859,6 -3864,10 +3859,10 @@@ static netdev_features_t mlx5e_fix_upli
        if (netdev->features & NETIF_F_NTUPLE)
                netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n");
  
+       features &= ~NETIF_F_GRO_HW;
+       if (netdev->features & NETIF_F_GRO_HW)
+               netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n");
        return features;
  }
  
@@@ -3891,6 -3900,25 +3895,25 @@@ static netdev_features_t mlx5e_fix_feat
                }
        }
  
+       if (params->xdp_prog) {
+               if (features & NETIF_F_LRO) {
+                       netdev_warn(netdev, "LRO is incompatible with XDP\n");
+                       features &= ~NETIF_F_LRO;
+               }
+               if (features & NETIF_F_GRO_HW) {
+                       netdev_warn(netdev, "HW GRO is incompatible with XDP\n");
+                       features &= ~NETIF_F_GRO_HW;
+               }
+       }
+       if (priv->xsk.refcnt) {
+               if (features & NETIF_F_GRO_HW) {
+                       netdev_warn(netdev, "HW GRO is incompatible with AF_XDP (%u XSKs are active)\n",
+                                   priv->xsk.refcnt);
+                       features &= ~NETIF_F_GRO_HW;
+               }
+       }
        if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
                features &= ~NETIF_F_RXHASH;
                if (netdev->features & NETIF_F_RXHASH)
@@@ -4466,6 -4494,12 +4489,6 @@@ static int mlx5e_xdp_allowed(struct mlx
                return -EINVAL;
        }
  
 -      if (mlx5_fpga_is_ipsec_device(priv->mdev)) {
 -              netdev_warn(netdev,
 -                          "XDP is not available on Innova cards with IPsec support\n");
 -              return -EINVAL;
 -      }
 -
        new_params = priv->channels.params;
        new_params.xdp_prog = prog;
  
@@@ -4839,10 -4873,6 +4862,6 @@@ static void mlx5e_build_nic_netdev(stru
        netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
        netdev->hw_features      |= NETIF_F_HW_VLAN_STAG_TX;
  
-       if (!!MLX5_CAP_GEN(mdev, shampo) &&
-           mlx5e_check_fragmented_striding_rq_cap(mdev))
-               netdev->hw_features    |= NETIF_F_GRO_HW;
        if (mlx5e_tunnel_any_tx_proto_supported(mdev)) {
                netdev->hw_enc_features |= NETIF_F_HW_CSUM;
                netdev->hw_enc_features |= NETIF_F_TSO;
  
        mlx5e_set_netdev_dev_addr(netdev);
        mlx5e_ipsec_build_netdev(priv);
 -      mlx5e_tls_build_netdev(priv);
 +      mlx5e_ktls_build_netdev(priv);
  }
  
  void mlx5e_create_q_counters(struct mlx5e_priv *priv)
@@@ -4985,7 -5015,7 +5004,7 @@@ static int mlx5e_nic_init(struct mlx5_c
        if (err)
                mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
  
 -      err = mlx5e_tls_init(priv);
 +      err = mlx5e_ktls_init(priv);
        if (err)
                mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
  
  static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
  {
        mlx5e_health_destroy_reporters(priv);
 -      mlx5e_tls_cleanup(priv);
 +      mlx5e_ktls_cleanup(priv);
        mlx5e_ipsec_cleanup(priv);
        mlx5e_fs_cleanup(priv);
  }
@@@ -5693,6 -5723,7 +5712,6 @@@ int mlx5e_init(void
  {
        int ret;
  
 -      mlx5e_ipsec_build_inverse_table();
        mlx5e_build_ptys2ethtool_map();
        ret = auxiliary_driver_register(&mlx5e_driver);
        if (ret)
@@@ -40,6 -40,8 +40,6 @@@
  #include "fs_cmd.h"
  #include "fs_ft_pool.h"
  #include "diag/fs_tracepoint.h"
 -#include "accel/ipsec.h"
 -#include "fpga/ipsec.h"
  
  #define INIT_TREE_NODE_ARRAY_SIZE(...)        (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
                                         sizeof(struct init_tree_node))
@@@ -186,18 -188,24 +186,18 @@@ static struct init_tree_node 
  
  static struct init_tree_node egress_root_fs = {
        .type = FS_TYPE_NAMESPACE,
 -#ifdef CONFIG_MLX5_IPSEC
        .ar_size = 2,
 -#else
 -      .ar_size = 1,
 -#endif
        .children = (struct init_tree_node[]) {
                ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
                         FS_CHAINING_CAPS_EGRESS,
                         ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
                                ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
                                                  BY_PASS_PRIO_NUM_LEVELS))),
 -#ifdef CONFIG_MLX5_IPSEC
                ADD_PRIO(0, KERNEL_TX_MIN_LEVEL, 0,
                         FS_CHAINING_CAPS_EGRESS,
                         ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
                                ADD_MULTIPLE_PRIO(KERNEL_TX_IPSEC_NUM_PRIOS,
                                                  KERNEL_TX_IPSEC_NUM_LEVELS))),
 -#endif
        }
  };
  
@@@ -2511,6 -2519,10 +2511,6 @@@ static struct mlx5_flow_root_namespac
        struct mlx5_flow_root_namespace *root_ns;
        struct mlx5_flow_namespace *ns;
  
 -      if (mlx5_fpga_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
 -          (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX))
 -              cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type);
 -
        /* Create the root namespace */
        root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL);
        if (!root_ns)
@@@ -2651,28 -2663,6 +2651,6 @@@ static void cleanup_root_ns(struct mlx5
        clean_tree(&root_ns->ns.node);
  }
  
- void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
- {
-       struct mlx5_flow_steering *steering = dev->priv.steering;
-       cleanup_root_ns(steering->root_ns);
-       cleanup_root_ns(steering->fdb_root_ns);
-       steering->fdb_root_ns = NULL;
-       kfree(steering->fdb_sub_ns);
-       steering->fdb_sub_ns = NULL;
-       cleanup_root_ns(steering->port_sel_root_ns);
-       cleanup_root_ns(steering->sniffer_rx_root_ns);
-       cleanup_root_ns(steering->sniffer_tx_root_ns);
-       cleanup_root_ns(steering->rdma_rx_root_ns);
-       cleanup_root_ns(steering->rdma_tx_root_ns);
-       cleanup_root_ns(steering->egress_root_ns);
-       mlx5_cleanup_fc_stats(dev);
-       kmem_cache_destroy(steering->ftes_cache);
-       kmem_cache_destroy(steering->fgs_cache);
-       mlx5_ft_pool_destroy(dev);
-       kfree(steering);
- }
  static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
  {
        struct fs_prio *prio;
@@@ -3074,42 -3064,27 +3052,27 @@@ cleanup
        return err;
  }
  
int mlx5_init_fs(struct mlx5_core_dev *dev)
void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev)
  {
-       struct mlx5_flow_steering *steering;
-       int err = 0;
-       err = mlx5_init_fc_stats(dev);
-       if (err)
-               return err;
-       err = mlx5_ft_pool_init(dev);
-       if (err)
-               return err;
-       steering = kzalloc(sizeof(*steering), GFP_KERNEL);
-       if (!steering) {
-               err = -ENOMEM;
-               goto err;
-       }
-       steering->dev = dev;
-       dev->priv.steering = steering;
+       struct mlx5_flow_steering *steering = dev->priv.steering;
  
-       if (mlx5_fs_dr_is_supported(dev))
-               steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
-       else
-               steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
+       cleanup_root_ns(steering->root_ns);
+       cleanup_root_ns(steering->fdb_root_ns);
+       steering->fdb_root_ns = NULL;
+       kfree(steering->fdb_sub_ns);
+       steering->fdb_sub_ns = NULL;
+       cleanup_root_ns(steering->port_sel_root_ns);
+       cleanup_root_ns(steering->sniffer_rx_root_ns);
+       cleanup_root_ns(steering->sniffer_tx_root_ns);
+       cleanup_root_ns(steering->rdma_rx_root_ns);
+       cleanup_root_ns(steering->rdma_tx_root_ns);
+       cleanup_root_ns(steering->egress_root_ns);
+ }
  
-       steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
-                                               sizeof(struct mlx5_flow_group), 0,
-                                               0, NULL);
-       steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
-                                                0, NULL);
-       if (!steering->ftes_cache || !steering->fgs_cache) {
-               err = -ENOMEM;
-               goto err;
-       }
+ int mlx5_fs_core_init(struct mlx5_core_dev *dev)
+ {
+       struct mlx5_flow_steering *steering = dev->priv.steering;
+       int err = 0;
  
        if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
              (MLX5_CAP_GEN(dev, nic_flow_table))) ||
                        goto err;
        }
  
 -      if (mlx5_fpga_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE ||
 -          MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
 +      if (MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
                err = init_egress_root_ns(steering);
                if (err)
                        goto err;
        }
  
        return 0;
+ err:
+       mlx5_fs_core_cleanup(dev);
+       return err;
+ }
+ void mlx5_fs_core_free(struct mlx5_core_dev *dev)
+ {
+       struct mlx5_flow_steering *steering = dev->priv.steering;
+       kmem_cache_destroy(steering->ftes_cache);
+       kmem_cache_destroy(steering->fgs_cache);
+       kfree(steering);
+       mlx5_ft_pool_destroy(dev);
+       mlx5_cleanup_fc_stats(dev);
+ }
+ int mlx5_fs_core_alloc(struct mlx5_core_dev *dev)
+ {
+       struct mlx5_flow_steering *steering;
+       int err = 0;
+       err = mlx5_init_fc_stats(dev);
+       if (err)
+               return err;
+       err = mlx5_ft_pool_init(dev);
+       if (err)
+               goto err;
+       steering = kzalloc(sizeof(*steering), GFP_KERNEL);
+       if (!steering) {
+               err = -ENOMEM;
+               goto err;
+       }
+       steering->dev = dev;
+       dev->priv.steering = steering;
+       if (mlx5_fs_dr_is_supported(dev))
+               steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
+       else
+               steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
+       steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
+                                               sizeof(struct mlx5_flow_group), 0,
+                                               0, NULL);
+       steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
+                                                0, NULL);
+       if (!steering->ftes_cache || !steering->fgs_cache) {
+               err = -ENOMEM;
+               goto err;
+       }
+       return 0;
  err:
-       mlx5_cleanup_fs(dev);
+       mlx5_fs_core_free(dev);
        return err;
  }
  
@@@ -62,7 -62,9 +62,7 @@@
  #include "lib/mlx5.h"
  #include "lib/tout.h"
  #include "fpga/core.h"
 -#include "fpga/ipsec.h"
 -#include "accel/ipsec.h"
 -#include "accel/tls.h"
 +#include "en_accel/ipsec_offload.h"
  #include "lib/clock.h"
  #include "lib/vxlan.h"
  #include "lib/geneve.h"
@@@ -936,6 -938,12 +936,12 @@@ static int mlx5_init_once(struct mlx5_c
                goto err_sf_table_cleanup;
        }
  
+       err = mlx5_fs_core_alloc(dev);
+       if (err) {
+               mlx5_core_err(dev, "Failed to alloc flow steering\n");
+               goto err_fs;
+       }
        dev->dm = mlx5_dm_create(dev);
        if (IS_ERR(dev->dm))
                mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
  
        return 0;
  
+ err_fs:
+       mlx5_sf_table_cleanup(dev);
  err_sf_table_cleanup:
        mlx5_sf_hw_table_cleanup(dev);
  err_sf_hw_table_cleanup:
@@@ -983,6 -993,7 +991,7 @@@ static void mlx5_cleanup_once(struct ml
        mlx5_hv_vhca_destroy(dev->hv_vhca);
        mlx5_fw_tracer_destroy(dev->tracer);
        mlx5_dm_cleanup(dev);
+       mlx5_fs_core_free(dev);
        mlx5_sf_table_cleanup(dev);
        mlx5_sf_hw_table_cleanup(dev);
        mlx5_vhca_event_cleanup(dev);
@@@ -1181,7 -1192,15 +1190,7 @@@ static int mlx5_load(struct mlx5_core_d
                goto err_fpga_start;
        }
  
-       err = mlx5_init_fs(dev);
 -      mlx5_accel_ipsec_init(dev);
 -
 -      err = mlx5_accel_tls_init(dev);
 -      if (err) {
 -              mlx5_core_err(dev, "TLS device start failed %d\n", err);
 -              goto err_tls_start;
 -      }
 -
+       err = mlx5_fs_core_init(dev);
        if (err) {
                mlx5_core_err(dev, "Failed to init flow steering\n");
                goto err_fs;
@@@ -1226,8 -1245,11 +1235,8 @@@ err_ec
  err_vhca:
        mlx5_vhca_event_stop(dev);
  err_set_hca:
-       mlx5_cleanup_fs(dev);
+       mlx5_fs_core_cleanup(dev);
  err_fs:
 -      mlx5_accel_tls_cleanup(dev);
 -err_tls_start:
 -      mlx5_accel_ipsec_cleanup(dev);
        mlx5_fpga_device_stop(dev);
  err_fpga_start:
        mlx5_rsc_dump_cleanup(dev);
@@@ -1252,7 -1274,9 +1261,7 @@@ static void mlx5_unload(struct mlx5_cor
        mlx5_ec_cleanup(dev);
        mlx5_sf_hw_table_destroy(dev);
        mlx5_vhca_event_stop(dev);
-       mlx5_cleanup_fs(dev);
+       mlx5_fs_core_cleanup(dev);
 -      mlx5_accel_ipsec_cleanup(dev);
 -      mlx5_accel_tls_cleanup(dev);
        mlx5_fpga_device_stop(dev);
        mlx5_rsc_dump_cleanup(dev);
        mlx5_hv_vhca_cleanup(dev->hv_vhca);
@@@ -1603,6 -1627,10 +1612,10 @@@ static void remove_one(struct pci_dev *
        struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
        struct devlink *devlink = priv_to_devlink(dev);
  
+       /* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain
+        * fw_reset before unregistering the devlink.
+        */
+       mlx5_drain_fw_reset(dev);
        devlink_unregister(devlink);
        mlx5_sriov_disable(pdev);
        mlx5_crdump_disable(dev);
@@@ -1932,6 -1960,7 +1945,6 @@@ static int __init init(void
        get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
  
        mlx5_core_verify_params();
 -      mlx5_fpga_ipsec_build_fs_cmds();
        mlx5_register_debugfs();
  
        err = pci_register_driver(&mlx5_core_driver);