{
LASSERT(!fpo->fpo_map_count);
- if (fpo->fmr.fpo_fmr_pool)
- ib_destroy_fmr_pool(fpo->fmr.fpo_fmr_pool);
+ if (fpo->fpo_is_fmr) {
+ if (fpo->fmr.fpo_fmr_pool)
+ ib_destroy_fmr_pool(fpo->fmr.fpo_fmr_pool);
+ } else {
+ struct kib_fast_reg_descriptor *frd, *tmp;
+ int i = 0;
+
+ list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list,
+ frd_list) {
+ list_del(&frd->frd_list);
+ ib_dereg_mr(frd->frd_mr);
+ LIBCFS_FREE(frd, sizeof(*frd));
+ i++;
+ }
+ if (i < fpo->fast_reg.fpo_pool_size)
+ CERROR("FastReg pool still has %d regions registered\n",
+ fpo->fast_reg.fpo_pool_size - i);
+ }
if (fpo->fpo_hdev)
kiblnd_hdev_decref(fpo->fpo_hdev);
return rc;
}
+static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo)
+{
+ struct kib_fast_reg_descriptor *frd, *tmp;
+ int i, rc;
+
+ INIT_LIST_HEAD(&fpo->fast_reg.fpo_pool_list);
+ fpo->fast_reg.fpo_pool_size = 0;
+ for (i = 0; i < fps->fps_pool_size; i++) {
+ LIBCFS_CPT_ALLOC(frd, lnet_cpt_table(), fps->fps_cpt,
+ sizeof(*frd));
+ if (!frd) {
+ CERROR("Failed to allocate a new fast_reg descriptor\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ frd->frd_mr = ib_alloc_mr(fpo->fpo_hdev->ibh_pd,
+ IB_MR_TYPE_MEM_REG,
+ LNET_MAX_PAYLOAD / PAGE_SIZE);
+ if (IS_ERR(frd->frd_mr)) {
+ rc = PTR_ERR(frd->frd_mr);
+ CERROR("Failed to allocate ib_alloc_mr: %d\n", rc);
+ frd->frd_mr = NULL;
+ goto out_middle;
+ }
+
+ frd->frd_valid = true;
+
+ list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list);
+ fpo->fast_reg.fpo_pool_size++;
+ }
+
+ return 0;
+
+out_middle:
+ if (frd->frd_mr)
+ ib_dereg_mr(frd->frd_mr);
+ LIBCFS_FREE(frd, sizeof(*frd));
+
+out:
+ list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list,
+ frd_list) {
+ list_del(&frd->frd_list);
+ ib_dereg_mr(frd->frd_mr);
+ LIBCFS_FREE(frd, sizeof(*frd));
+ }
+
+ return rc;
+}
+
static int kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps,
kib_fmr_pool_t **pp_fpo)
{
kib_dev_t *dev = fps->fps_net->ibn_dev;
+ struct ib_device_attr *dev_attr;
kib_fmr_pool_t *fpo;
int rc;
return -ENOMEM;
fpo->fpo_hdev = kiblnd_current_hdev(dev);
+ dev_attr = &fpo->fpo_hdev->ibh_ibdev->attrs;
- /* Check for FMR support */
+ /* Check for FMR or FastReg support */
+ fpo->fpo_is_fmr = 0;
if (fpo->fpo_hdev->ibh_ibdev->alloc_fmr &&
fpo->fpo_hdev->ibh_ibdev->dealloc_fmr &&
fpo->fpo_hdev->ibh_ibdev->map_phys_fmr &&
fpo->fpo_hdev->ibh_ibdev->unmap_fmr) {
LCONSOLE_INFO("Using FMR for registration\n");
+ fpo->fpo_is_fmr = 1;
+ } else if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ LCONSOLE_INFO("Using FastReg for registration\n");
} else {
rc = -ENOSYS;
- LCONSOLE_ERROR_MSG(rc, "IB device does not support FMRs, can't register memory\n");
+ LCONSOLE_ERROR_MSG(rc, "IB device does not support FMRs nor FastRegs, can't register memory\n");
goto out_fpo;
}
- rc = kiblnd_alloc_fmr_pool(fps, fpo);
+ if (fpo->fpo_is_fmr)
+ rc = kiblnd_alloc_fmr_pool(fps, fpo);
+ else
+ rc = kiblnd_alloc_freg_pool(fps, fpo);
if (rc)
goto out_fpo;
return cfs_time_aftereq(now, fpo->fpo_deadline);
}
+static int
+kiblnd_map_tx_pages(kib_tx_t *tx, kib_rdma_desc_t *rd)
+{
+ __u64 *pages = tx->tx_pages;
+ kib_hca_dev_t *hdev;
+ int npages;
+ int size;
+ int i;
+
+ hdev = tx->tx_pool->tpo_hdev;
+
+ for (i = 0, npages = 0; i < rd->rd_nfrags; i++) {
+ for (size = 0; size < rd->rd_frags[i].rf_nob;
+ size += hdev->ibh_page_size) {
+ pages[npages++] = (rd->rd_frags[i].rf_addr &
+ hdev->ibh_page_mask) + size;
+ }
+ }
+
+ return npages;
+}
+
void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status)
{
LIST_HEAD(zombies);
return;
fps = fpo->fpo_owner;
- if (fmr->fmr_pfmr) {
- rc = ib_fmr_pool_unmap(fmr->fmr_pfmr);
- LASSERT(!rc);
- fmr->fmr_pfmr = NULL;
- }
+ if (fpo->fpo_is_fmr) {
+ if (fmr->fmr_pfmr) {
+ rc = ib_fmr_pool_unmap(fmr->fmr_pfmr);
+ LASSERT(!rc);
+ fmr->fmr_pfmr = NULL;
+ }
- if (status) {
- rc = ib_flush_fmr_pool(fpo->fmr.fpo_fmr_pool);
- LASSERT(!rc);
- }
+ if (status) {
+ rc = ib_flush_fmr_pool(fpo->fmr.fpo_fmr_pool);
+ LASSERT(!rc);
+ }
+ } else {
+ struct kib_fast_reg_descriptor *frd = fmr->fmr_frd;
+ if (frd) {
+ frd->frd_valid = false;
+ spin_lock(&fps->fps_lock);
+ list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list);
+ spin_unlock(&fps->fps_lock);
+ fmr->fmr_frd = NULL;
+ }
+ }
fmr->fmr_pool = NULL;
spin_lock(&fps->fps_lock);
kiblnd_destroy_fmr_pool_list(&zombies);
}
-int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages,
- __u32 nob, __u64 iov, bool is_rx, kib_fmr_t *fmr)
+int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, kib_tx_t *tx,
+ kib_rdma_desc_t *rd, __u32 nob, __u64 iov,
+ kib_fmr_t *fmr)
{
- struct ib_pool_fmr *pfmr;
+ __u64 *pages = tx->tx_pages;
+ bool is_rx = (rd != tx->tx_rd);
+ bool tx_pages_mapped = 0;
kib_fmr_pool_t *fpo;
+ int npages = 0;
__u64 version;
int rc;
list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) {
fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
fpo->fpo_map_count++;
- spin_unlock(&fps->fps_lock);
- pfmr = ib_fmr_pool_map_phys(fpo->fmr.fpo_fmr_pool,
- pages, npages, iov);
- if (likely(!IS_ERR(pfmr))) {
- fmr->fmr_key = is_rx ? pfmr->fmr->rkey :
- pfmr->fmr->lkey;
- fmr->fmr_pfmr = pfmr;
- fmr->fmr_pool = fpo;
- return 0;
+ if (fpo->fpo_is_fmr) {
+ struct ib_pool_fmr *pfmr;
+
+ spin_unlock(&fps->fps_lock);
+
+ if (!tx_pages_mapped) {
+ npages = kiblnd_map_tx_pages(tx, rd);
+ tx_pages_mapped = 1;
+ }
+
+ pfmr = ib_fmr_pool_map_phys(fpo->fmr.fpo_fmr_pool,
+ pages, npages, iov);
+ if (likely(!IS_ERR(pfmr))) {
+ fmr->fmr_key = is_rx ? pfmr->fmr->rkey :
+ pfmr->fmr->lkey;
+ fmr->fmr_frd = NULL;
+ fmr->fmr_pfmr = pfmr;
+ fmr->fmr_pool = fpo;
+ return 0;
+ }
+ rc = PTR_ERR(pfmr);
+ } else {
+ if (!list_empty(&fpo->fast_reg.fpo_pool_list)) {
+ struct kib_fast_reg_descriptor *frd;
+ struct ib_reg_wr *wr;
+ struct ib_mr *mr;
+ int n;
+
+ frd = list_first_entry(&fpo->fast_reg.fpo_pool_list,
+ struct kib_fast_reg_descriptor,
+ frd_list);
+ list_del(&frd->frd_list);
+ spin_unlock(&fps->fps_lock);
+
+ mr = frd->frd_mr;
+
+ if (!frd->frd_valid) {
+ __u32 key = is_rx ? mr->rkey : mr->lkey;
+ struct ib_send_wr *inv_wr;
+
+ inv_wr = &frd->frd_inv_wr;
+ memset(inv_wr, 0, sizeof(*inv_wr));
+ inv_wr->opcode = IB_WR_LOCAL_INV;
+ inv_wr->wr_id = IBLND_WID_MR;
+ inv_wr->ex.invalidate_rkey = key;
+
+ /* Bump the key */
+ key = ib_inc_rkey(key);
+ ib_update_fast_reg_key(mr, key);
+ }
+
+ n = ib_map_mr_sg(mr, tx->tx_frags,
+ tx->tx_nfrags, PAGE_SIZE);
+ if (unlikely(n != tx->tx_nfrags)) {
+ CERROR("Failed to map mr %d/%d elements\n",
+ n, tx->tx_nfrags);
+ return n < 0 ? n : -EINVAL;
+ }
+
+ mr->iova = iov;
+
+ /* Prepare FastReg WR */
+ wr = &frd->frd_fastreg_wr;
+ memset(wr, 0, sizeof(*wr));
+ wr->wr.opcode = IB_WR_REG_MR;
+ wr->wr.wr_id = IBLND_WID_MR;
+ wr->wr.num_sge = 0;
+ wr->wr.send_flags = 0;
+ wr->mr = mr;
+ wr->key = is_rx ? mr->rkey : mr->lkey;
+ wr->access = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE);
+
+ fmr->fmr_key = is_rx ? mr->rkey : mr->lkey;
+ fmr->fmr_frd = frd;
+ fmr->fmr_pfmr = NULL;
+ fmr->fmr_pool = fpo;
+ return 0;
+ }
+ spin_unlock(&fps->fps_lock);
+ rc = -EBUSY;
}
- rc = PTR_ERR(pfmr);
spin_lock(&fps->fps_lock);
fpo->fpo_map_count--;
/* failed to allocate */
} kib_fmr_poolset_t;
+struct kib_fast_reg_descriptor { /* For fast registration */
+ struct list_head frd_list;
+ struct ib_send_wr frd_inv_wr;
+ struct ib_reg_wr frd_fastreg_wr;
+ struct ib_mr *frd_mr;
+ bool frd_valid;
+};
+
typedef struct {
struct list_head fpo_list; /* chain on pool list */
struct kib_hca_dev *fpo_hdev; /* device for this pool */
struct {
struct ib_fmr_pool *fpo_fmr_pool; /* IB FMR pool */
} fmr;
+ struct { /* For fast registration */
+ struct list_head fpo_pool_list;
+ int fpo_pool_size;
+ } fast_reg;
};
unsigned long fpo_deadline; /* deadline of this pool */
int fpo_failed; /* fmr pool is failed */
int fpo_map_count; /* # of mapped FMR */
+ int fpo_is_fmr;
} kib_fmr_pool_t;
typedef struct {
- kib_fmr_pool_t *fmr_pool; /* pool of FMR */
- struct ib_pool_fmr *fmr_pfmr; /* IB pool fmr */
- u32 fmr_key;
+ kib_fmr_pool_t *fmr_pool; /* pool of FMR */
+ struct ib_pool_fmr *fmr_pfmr; /* IB pool fmr */
+ struct kib_fast_reg_descriptor *fmr_frd;
+ u32 fmr_key;
} kib_fmr_t;
typedef struct kib_net {
void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node);
struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps);
-int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages,
- __u32 nob, __u64 iov, bool is_rx, kib_fmr_t *fmr);
+int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, kib_tx_t *tx,
+ kib_rdma_desc_t *rd, __u32 nob, __u64 iov,
+ kib_fmr_t *fmr);
void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status);
int kiblnd_tunables_init(void);
kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, __u32 nob)
{
kib_hca_dev_t *hdev;
- __u64 *pages = tx->tx_pages;
kib_fmr_poolset_t *fps;
- int npages;
- int size;
int cpt;
int rc;
- int i;
LASSERT(tx->tx_pool);
LASSERT(tx->tx_pool->tpo_pool.po_owner);
hdev = tx->tx_pool->tpo_hdev;
-
- for (i = 0, npages = 0; i < rd->rd_nfrags; i++) {
- for (size = 0; size < rd->rd_frags[i].rf_nob;
- size += hdev->ibh_page_size) {
- pages[npages++] = (rd->rd_frags[i].rf_addr &
- hdev->ibh_page_mask) + size;
- }
- }
-
cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt;
fps = net->ibn_fmr_ps[cpt];
- rc = kiblnd_fmr_pool_map(fps, pages, npages, nob, 0, (rd != tx->tx_rd),
- &tx->fmr);
+ rc = kiblnd_fmr_pool_map(fps, tx, rd, nob, 0, &tx->fmr);
if (rc) {
- CERROR("Can't map %d pages: %d\n", npages, rc);
+ CERROR("Can't map %u bytes: %d\n", nob, rc);
return rc;
}
/* close_conn will launch failover */
rc = -ENETDOWN;
} else {
- struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq - 1].wr;
+ struct kib_fast_reg_descriptor *frd = tx->fmr.fmr_frd;
+ struct ib_send_wr *bad = &tx->tx_wrq[tx->tx_nwrq - 1].wr;
+ struct ib_send_wr *wrq = &tx->tx_wrq[0].wr;
+
+ if (frd) {
+ if (!frd->frd_valid) {
+ wrq = &frd->frd_inv_wr;
+ wrq->next = &frd->frd_fastreg_wr.wr;
+ } else {
+ wrq = &frd->frd_fastreg_wr.wr;
+ }
+ frd->frd_fastreg_wr.wr.next = &tx->tx_wrq[0].wr;
+ }
- LASSERTF(wrq->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX),
+ LASSERTF(bad->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX),
"bad wr_id %llx, opc %d, flags %d, peer: %s\n",
- wrq->wr_id, wrq->opcode, wrq->send_flags,
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- wrq = NULL;
- rc = ib_post_send(conn->ibc_cmid->qp, &tx->tx_wrq->wr, &wrq);
+ bad->wr_id, bad->opcode, bad->send_flags,
+ libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ bad = NULL;
+ rc = ib_post_send(conn->ibc_cmid->qp, wrq, &bad);
}
conn->ibc_last_send = jiffies;
kib_msg_t *ibmsg = tx->tx_msg;
kib_rdma_desc_t *srcrd = tx->tx_rd;
struct ib_sge *sge = &tx->tx_sge[0];
- struct ib_rdma_wr *wrq = &tx->tx_wrq[0], *next;
+ struct ib_rdma_wr *wrq, *next;
int rc = resid;
int srcidx = 0;
int dstidx = 0;
default:
LBUG();
+ case IBLND_WID_MR:
+ if (wc->status != IB_WC_SUCCESS &&
+ wc->status != IB_WC_WR_FLUSH_ERR)
+ CNETERR("FastReg failed: %d\n", wc->status);
+ break;
+
case IBLND_WID_RDMA:
/*
* We only get RDMA completion notification if it fails. All