RNBD_OPT_DEV_PATH = 1 << 2,
RNBD_OPT_ACCESS_MODE = 1 << 3,
RNBD_OPT_SESSNAME = 1 << 6,
+ RNBD_OPT_NR_POLL_QUEUES = 1 << 7,
};
static const unsigned int rnbd_opt_mandatory[] = {
};
static const match_table_t rnbd_opt_tokens = {
- {RNBD_OPT_PATH, "path=%s" },
- {RNBD_OPT_DEV_PATH, "device_path=%s"},
- {RNBD_OPT_DEST_PORT, "dest_port=%d" },
- {RNBD_OPT_ACCESS_MODE, "access_mode=%s"},
- {RNBD_OPT_SESSNAME, "sessname=%s" },
- {RNBD_OPT_ERR, NULL },
+ {RNBD_OPT_PATH, "path=%s" },
+ {RNBD_OPT_DEV_PATH, "device_path=%s" },
+ {RNBD_OPT_DEST_PORT, "dest_port=%d" },
+ {RNBD_OPT_ACCESS_MODE, "access_mode=%s" },
+ {RNBD_OPT_SESSNAME, "sessname=%s" },
+ {RNBD_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" },
+ {RNBD_OPT_ERR, NULL },
};
struct rnbd_map_options {
char *pathname;
u16 *dest_port;
enum rnbd_access_mode *access_mode;
+ u32 *nr_poll_queues;
};
static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
int opt_mask = 0;
int token;
int ret = -EINVAL;
- int i, dest_port;
+ int i, dest_port, nr_poll_queues;
int p_cnt = 0;
options = kstrdup(buf, GFP_KERNEL);
kfree(p);
break;
+ case RNBD_OPT_NR_POLL_QUEUES:
+ if (match_int(args, &nr_poll_queues) || nr_poll_queues < -1 ||
+ nr_poll_queues > (int)nr_cpu_ids) {
+ pr_err("bad nr_poll_queues parameter '%d'\n",
+ nr_poll_queues);
+ ret = -EINVAL;
+ goto out;
+ }
+ if (nr_poll_queues == -1)
+ nr_poll_queues = nr_cpu_ids;
+ *opt->nr_poll_queues = nr_poll_queues;
+ break;
+
default:
pr_err("map_device: Unknown parameter or missing value '%s'\n",
p);
static struct kobj_attribute rnbd_clt_state_attr = __ATTR_RO(state);
+static ssize_t nr_poll_queues_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ struct rnbd_clt_dev *dev;
+
+ dev = container_of(kobj, struct rnbd_clt_dev, kobj);
+
+ return sysfs_emit(page, "%d\n", dev->nr_poll_queues);
+}
+
+static struct kobj_attribute rnbd_clt_nr_poll_queues =
+ __ATTR_RO(nr_poll_queues);
+
static ssize_t mapping_path_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
&rnbd_clt_state_attr.attr,
&rnbd_clt_session_attr.attr,
&rnbd_clt_access_mode.attr,
+ &rnbd_clt_nr_poll_queues.attr,
NULL,
};
char *page)
{
return scnprintf(page, PAGE_SIZE,
- "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
+ "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
attr->attr.name);
}
char sessname[NAME_MAX];
enum rnbd_access_mode access_mode = RNBD_ACCESS_RW;
u16 port_nr = RTRS_PORT;
+ u32 nr_poll_queues = 0;
struct sockaddr_storage *addrs;
struct rtrs_addr paths[6];
opt.pathname = pathname;
opt.dest_port = &port_nr;
opt.access_mode = &access_mode;
+ opt.nr_poll_queues = &nr_poll_queues;
addrs = kcalloc(ARRAY_SIZE(paths) * 2, sizeof(*addrs), GFP_KERNEL);
if (!addrs)
return -ENOMEM;
if (ret)
goto out;
- pr_info("Mapping device %s on session %s, (access_mode: %s)\n",
+ pr_info("Mapping device %s on session %s, (access_mode: %s, nr_poll_queues: %d)\n",
pathname, sessname,
- rnbd_access_mode_str(access_mode));
+ rnbd_access_mode_str(access_mode),
+ nr_poll_queues);
dev = rnbd_clt_map_device(sessname, paths, path_cnt, port_nr, pathname,
- access_mode);
+ access_mode, nr_poll_queues);
if (IS_ERR(dev)) {
ret = PTR_ERR(dev);
goto out;
return ret;
}
+static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx)
+{
+ struct rnbd_queue *q = hctx->driver_data;
+ struct rnbd_clt_dev *dev = q->dev;
+ int cnt;
+
+ cnt = rtrs_clt_rdma_cq_direct(dev->sess->rtrs, hctx->queue_num);
+ return cnt;
+}
+
+static int rnbd_rdma_map_queues(struct blk_mq_tag_set *set)
+{
+ struct rnbd_clt_session *sess = set->driver_data;
+
+ /* shared read/write queues */
+ set->map[HCTX_TYPE_DEFAULT].nr_queues = num_online_cpus();
+ set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
+ set->map[HCTX_TYPE_READ].nr_queues = num_online_cpus();
+ set->map[HCTX_TYPE_READ].queue_offset = 0;
+ blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
+ blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
+
+ if (sess->nr_poll_queues) {
+ /* dedicated queue for poll */
+ set->map[HCTX_TYPE_POLL].nr_queues = sess->nr_poll_queues;
+ set->map[HCTX_TYPE_POLL].queue_offset = set->map[HCTX_TYPE_READ].queue_offset +
+ set->map[HCTX_TYPE_READ].nr_queues;
+ blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
+ pr_info("[session=%s] mapped %d/%d/%d default/read/poll queues.\n",
+ sess->sessname,
+ set->map[HCTX_TYPE_DEFAULT].nr_queues,
+ set->map[HCTX_TYPE_READ].nr_queues,
+ set->map[HCTX_TYPE_POLL].nr_queues);
+ } else {
+ pr_info("[session=%s] mapped %d/%d default/read queues.\n",
+ sess->sessname,
+ set->map[HCTX_TYPE_DEFAULT].nr_queues,
+ set->map[HCTX_TYPE_READ].nr_queues);
+ }
+
+ return 0;
+}
+
static struct blk_mq_ops rnbd_mq_ops = {
.queue_rq = rnbd_queue_rq,
.complete = rnbd_softirq_done_fn,
+ .map_queues = rnbd_rdma_map_queues,
+ .poll = rnbd_rdma_poll,
};
static int setup_mq_tags(struct rnbd_clt_session *sess)
tag_set->flags = BLK_MQ_F_SHOULD_MERGE |
BLK_MQ_F_TAG_QUEUE_SHARED;
tag_set->cmd_size = sizeof(struct rnbd_iu) + RNBD_RDMA_SGL_SIZE;
- tag_set->nr_hw_queues = num_online_cpus();
+
+ /* for HCTX_TYPE_DEFAULT, HCTX_TYPE_READ, HCTX_TYPE_POLL */
+ tag_set->nr_maps = sess->nr_poll_queues ? HCTX_MAX_TYPES : 2;
+ /*
+ * HCTX_TYPE_DEFAULT and HCTX_TYPE_READ share one set of queues
+ * others are for HCTX_TYPE_POLL
+ */
+ tag_set->nr_hw_queues = num_online_cpus() + sess->nr_poll_queues;
+ tag_set->driver_data = sess;
return blk_mq_alloc_tag_set(tag_set);
}
static struct rnbd_clt_session *
find_and_get_or_create_sess(const char *sessname,
const struct rtrs_addr *paths,
- size_t path_cnt, u16 port_nr)
+ size_t path_cnt, u16 port_nr, u32 nr_poll_queues)
{
struct rnbd_clt_session *sess;
struct rtrs_attrs attrs;
struct rtrs_clt_ops rtrs_ops;
sess = find_or_create_sess(sessname, &first);
+ if (sess == ERR_PTR(-ENOMEM))
+ return ERR_PTR(-ENOMEM);
+ else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) {
+ /*
+ * A device MUST have its own session to use the polling-mode.
+ * It must fail to map new device with the same session.
+ */
+ err = -EINVAL;
+ goto put_sess;
+ }
+
if (!first)
return sess;
0, /* Do not use pdu of rtrs */
RECONNECT_DELAY, BMAX_SEGMENTS,
BLK_MAX_SEGMENT_SIZE,
- MAX_RECONNECTS);
+ MAX_RECONNECTS, nr_poll_queues);
if (IS_ERR(sess->rtrs)) {
err = PTR_ERR(sess->rtrs);
goto wake_up_and_put;
rtrs_clt_query(sess->rtrs, &attrs);
sess->max_io_size = attrs.max_io_size;
sess->queue_depth = attrs.queue_depth;
+ sess->nr_poll_queues = nr_poll_queues;
err = setup_mq_tags(sess);
if (err)
static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
enum rnbd_access_mode access_mode,
- const char *pathname)
+ const char *pathname,
+ u32 nr_poll_queues)
{
struct rnbd_clt_dev *dev;
int ret;
if (!dev)
return ERR_PTR(-ENOMEM);
- dev->hw_queues = kcalloc(nr_cpu_ids, sizeof(*dev->hw_queues),
+ /*
+ * nr_cpu_ids: the number of softirq queues
+ * nr_poll_queues: the number of polling queues
+ */
+ dev->hw_queues = kcalloc(nr_cpu_ids + nr_poll_queues,
+ sizeof(*dev->hw_queues),
GFP_KERNEL);
if (!dev->hw_queues) {
ret = -ENOMEM;
dev->clt_device_id = ret;
dev->sess = sess;
dev->access_mode = access_mode;
+ dev->nr_poll_queues = nr_poll_queues;
mutex_init(&dev->lock);
refcount_set(&dev->refcount, 1);
dev->dev_state = DEV_STATE_INIT;
struct rtrs_addr *paths,
size_t path_cnt, u16 port_nr,
const char *pathname,
- enum rnbd_access_mode access_mode)
+ enum rnbd_access_mode access_mode,
+ u32 nr_poll_queues)
{
struct rnbd_clt_session *sess;
struct rnbd_clt_dev *dev;
if (unlikely(exists_devpath(pathname, sessname)))
return ERR_PTR(-EEXIST);
- sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr);
+ sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr, nr_poll_queues);
if (IS_ERR(sess))
return ERR_CAST(sess);
- dev = init_dev(sess, access_mode, pathname);
+ dev = init_dev(sess, access_mode, pathname, nr_poll_queues);
if (IS_ERR(dev)) {
pr_err("map_device: failed to map device '%s' from session %s, can't initialize device, err: %ld\n",
pathname, sess->sessname, PTR_ERR(dev));
int queue_depth;
u32 max_io_size;
struct blk_mq_tag_set tag_set;
+ u32 nr_poll_queues;
struct mutex lock; /* protects state and devs_list */
struct list_head devs_list; /* list of struct rnbd_clt_dev */
refcount_t refcount;
enum rnbd_clt_dev_state dev_state;
char *pathname;
enum rnbd_access_mode access_mode;
+ u32 nr_poll_queues;
bool read_only;
bool rotational;
bool wc;
struct rtrs_addr *paths,
size_t path_cnt, u16 port_nr,
const char *pathname,
- enum rnbd_access_mode access_mode);
+ enum rnbd_access_mode access_mode,
+ u32 nr_poll_queues);
int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force,
const struct attribute *sysfs_self);
int id = 0;
if (likely(permit->con_type == RTRS_IO_CON))
- id = (permit->cpu_id % (sess->s.con_num - 1)) + 1;
+ id = (permit->cpu_id % (sess->s.irq_con_num - 1)) + 1;
return to_clt_con(sess->s.con[id]);
}
static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
const struct rtrs_addr *path,
size_t con_num, u16 max_segments,
- size_t max_segment_size)
+ size_t max_segment_size, u32 nr_poll_queues)
{
struct rtrs_clt_sess *sess;
int err = -ENOMEM;
int cpu;
+ size_t total_con;
sess = kzalloc(sizeof(*sess), GFP_KERNEL);
if (!sess)
goto err;
- /* Extra connection for user messages */
- con_num += 1;
-
- sess->s.con = kcalloc(con_num, sizeof(*sess->s.con), GFP_KERNEL);
+ /*
+ * irqmode and poll
+ * +1: Extra connection for user messages
+ */
+ total_con = con_num + nr_poll_queues + 1;
+ sess->s.con = kcalloc(total_con, sizeof(*sess->s.con), GFP_KERNEL);
if (!sess->s.con)
goto err_free_sess;
+ sess->s.con_num = total_con;
+ sess->s.irq_con_num = con_num + 1;
+
sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL);
if (!sess->stats)
goto err_free_con;
memcpy(&sess->s.src_addr, path->src,
rdma_addr_size((struct sockaddr *)path->src));
strlcpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname));
- sess->s.con_num = con_num;
sess->clt = clt;
sess->max_pages_per_mr = max_segments * max_segment_size >> 12;
init_waitqueue_head(&sess->state_wq);
}
cq_size = max_send_wr + max_recv_wr;
cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors;
- err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
- cq_vector, cq_size, max_send_wr,
- max_recv_wr, IB_POLL_SOFTIRQ);
+ if (con->c.cid >= sess->s.irq_con_num)
+ err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
+ cq_vector, cq_size, max_send_wr,
+ max_recv_wr, IB_POLL_DIRECT);
+ else
+ err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
+ cq_vector, cq_size, max_send_wr,
+ max_recv_wr, IB_POLL_SOFTIRQ);
/*
* In case of error we do not bother to clean previous allocations,
* since destroy_con_cq_qp() must be called.
* @max_segment_size: Max. size of one segment
* @max_reconnect_attempts: Number of times to reconnect on error before giving
* up, 0 for * disabled, -1 for forever
+ * @nr_poll_queues: number of polling mode connection using IB_POLL_DIRECT flag
*
* Starts session establishment with the rtrs_server. The function can block
* up to ~2000ms before it returns.
size_t pdu_sz, u8 reconnect_delay_sec,
u16 max_segments,
size_t max_segment_size,
- s16 max_reconnect_attempts)
+ s16 max_reconnect_attempts, u32 nr_poll_queues)
{
struct rtrs_clt_sess *sess, *tmp;
struct rtrs_clt *clt;
struct rtrs_clt_sess *sess;
sess = alloc_sess(clt, &paths[i], nr_cpu_ids,
- max_segments, max_segment_size);
+ max_segments, max_segment_size, nr_poll_queues);
if (IS_ERR(sess)) {
err = PTR_ERR(sess);
goto close_all_sess;
}
EXPORT_SYMBOL(rtrs_clt_request);
+int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index)
+{
+ int cnt;
+ struct rtrs_con *con;
+ struct rtrs_clt_sess *sess;
+ struct path_it it;
+
+ rcu_read_lock();
+ for (path_it_init(&it, clt);
+ (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
+ if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)
+ continue;
+
+ con = sess->s.con[index + 1];
+ cnt = ib_process_cq_direct(con->cq, -1);
+ if (cnt)
+ break;
+ }
+ path_it_deinit(&it);
+ rcu_read_unlock();
+
+ return cnt;
+}
+EXPORT_SYMBOL(rtrs_clt_rdma_cq_direct);
+
/**
* rtrs_clt_query() - queries RTRS session attributes
*@clt: session pointer
int err;
sess = alloc_sess(clt, addr, nr_cpu_ids, clt->max_segments,
- clt->max_segment_size);
+ clt->max_segment_size, 0);
if (IS_ERR(sess))
return PTR_ERR(sess);
uuid_t uuid;
struct rtrs_con **con;
unsigned int con_num;
+ unsigned int irq_con_num;
unsigned int recon_cnt;
struct rtrs_ib_dev *dev;
int dev_ref;
size_t pdu_sz, u8 reconnect_delay_sec,
u16 max_segments,
size_t max_segment_size,
- s16 max_reconnect_attempts);
+ s16 max_reconnect_attempts, u32 nr_poll_queues);
void rtrs_clt_close(struct rtrs_clt *sess);
struct rtrs_clt *sess, struct rtrs_permit *permit,
const struct kvec *vec, size_t nr, size_t len,
struct scatterlist *sg, unsigned int sg_cnt);
+int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index);
/**
* rtrs_attrs - RTRS session attributes