From: Sagi Grimberg Date: Tue, 25 Feb 2020 23:53:09 +0000 (-0800) Subject: nvme-tcp: optimize queue io_cpu assignment for multiple queue maps X-Git-Tag: v5.10.7~3002^2~3^2~23 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=40510a639ec08db81d5ff9c79856baf9dda94748;p=platform%2Fkernel%2Flinux-rpi.git nvme-tcp: optimize queue io_cpu assignment for multiple queue maps Currently, queue io_cpu assignment is done sequentially for default, read and poll queues based on queue id. This causes miss-alignment between context of CPU initiating I/O and the I/O worker thread processing queued requests or completions. Change to modify queue io_cpu assignment to take into account queue maps offset. Each queue io_cpu will start at zero for each queue map. This essentially aligns read/poll queues to start over the same range as default queues. Testing performed by Mark with: - ram device (nvmet) - single CPU core (pinned) - 100% 4k reads - engine io_uring (not using sq_thread option) - hipri flag set Micro-benchmark results show a net gain of: - increase of 18%-29% in IOPs - reduction of 16%-22% in average latency - reduction of 7%-23% in 99.99% latency Baseline: ======== QDepth/Batch | IOPs [k] | Avg. Lat [us] | 99.99% Lat [us] ----------------------------------------------------------------- 1/1 | 32.4 | 30.11 | 50.94 32/8 | 179 | 168.20 | 371 CPU alignment: ============= QDepth/Batch | IOPs [k] | Avg. Lat [us] | 99.99% Lat [us] ----------------------------------------------------------------- 1/1 | 38.5 | 25.18 | 39.16 32/8 | 231 | 130.75 | 343 Reported-by: Mark Wunderlich Signed-off-by: Sagi Grimberg Signed-off-by: Keith Busch --- diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index e384239..11a7c26 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1258,13 +1258,67 @@ free_icreq: return ret; } +static bool nvme_tcp_admin_queue(struct nvme_tcp_queue *queue) +{ + return nvme_tcp_queue_id(queue) == 0; +} + +static bool nvme_tcp_default_queue(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + + return !nvme_tcp_admin_queue(queue) && + qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT]; +} + +static bool nvme_tcp_read_queue(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + + return !nvme_tcp_admin_queue(queue) && + !nvme_tcp_default_queue(queue) && + qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] + + ctrl->io_queues[HCTX_TYPE_READ]; +} + +static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + + return !nvme_tcp_admin_queue(queue) && + !nvme_tcp_default_queue(queue) && + !nvme_tcp_read_queue(queue) && + qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] + + ctrl->io_queues[HCTX_TYPE_READ] + + ctrl->io_queues[HCTX_TYPE_POLL]; +} + +static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + int n = 0; + + if (nvme_tcp_default_queue(queue)) + n = qid - 1; + else if (nvme_tcp_read_queue(queue)) + n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1; + else if (nvme_tcp_poll_queue(queue)) + n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - + ctrl->io_queues[HCTX_TYPE_READ] - 1; + queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); +} + static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, size_t queue_size) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; struct linger sol = { .l_onoff = 1, .l_linger = 0 }; - int ret, opt, rcv_pdu_size, n; + int ret, opt, rcv_pdu_size; queue->ctrl = ctrl; INIT_LIST_HEAD(&queue->send_list); @@ -1343,11 +1397,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, } queue->sock->sk->sk_allocation = GFP_ATOMIC; - if (!qid) - n = 0; - else - n = (qid - 1) % num_online_cpus(); - queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); + nvme_tcp_set_queue_io_cpu(queue); queue->request = NULL; queue->data_remaining = 0; queue->ddgst_remaining = 0;