blk-mq: avoid sysfs buffer overflow with too many CPU cores
[platform/kernel/linux-rpi.git] / block / blk-mq.c
index 85a1c1a..684acaa 100644 (file)
@@ -701,12 +701,20 @@ static void blk_mq_requeue_work(struct work_struct *work)
        spin_unlock_irq(&q->requeue_lock);
 
        list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
-               if (!(rq->rq_flags & RQF_SOFTBARRIER))
+               if (!(rq->rq_flags & (RQF_SOFTBARRIER | RQF_DONTPREP)))
                        continue;
 
                rq->rq_flags &= ~RQF_SOFTBARRIER;
                list_del_init(&rq->queuelist);
-               blk_mq_sched_insert_request(rq, true, false, false);
+               /*
+                * If RQF_DONTPREP, rq has contained some driver specific
+                * data, so insert it to hctx dispatch list to avoid any
+                * merge.
+                */
+               if (rq->rq_flags & RQF_DONTPREP)
+                       blk_mq_request_bypass_insert(rq, false);
+               else
+                       blk_mq_sched_insert_request(rq, true, false, false);
        }
 
        while (!list_empty(&rq_list)) {
@@ -836,7 +844,10 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
         */
        if (blk_mq_req_expired(rq, next))
                blk_mq_rq_timed_out(rq, reserved);
-       if (refcount_dec_and_test(&rq->ref))
+
+       if (is_flush_rq(rq, hctx))
+               rq->end_io(rq, 0);
+       else if (refcount_dec_and_test(&rq->ref))
                __blk_mq_free_request(rq);
 }
 
@@ -1628,7 +1639,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
                BUG_ON(!rq->q);
                if (rq->mq_ctx != this_ctx) {
                        if (this_ctx) {
-                               trace_block_unplug(this_q, depth, from_schedule);
+                               trace_block_unplug(this_q, depth, !from_schedule);
                                blk_mq_sched_insert_requests(this_q, this_ctx,
                                                                &ctx_list,
                                                                from_schedule);
@@ -1648,7 +1659,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
         * on 'ctx_list'. Do those.
         */
        if (this_ctx) {
-               trace_block_unplug(this_q, depth, from_schedule);
+               trace_block_unplug(this_q, depth, !from_schedule);
                blk_mq_sched_insert_requests(this_q, this_ctx, &ctx_list,
                                                from_schedule);
        }
@@ -1747,7 +1758,7 @@ insert:
        if (bypass_insert)
                return BLK_STS_RESOURCE;
 
-       blk_mq_sched_insert_request(rq, false, run_queue, false);
+       blk_mq_request_bypass_insert(rq, run_queue);
        return BLK_STS_OK;
 }
 
@@ -1763,7 +1774,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 
        ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false);
        if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
-               blk_mq_sched_insert_request(rq, false, true, false);
+               blk_mq_request_bypass_insert(rq, true);
        else if (ret != BLK_STS_OK)
                blk_mq_end_request(rq, ret);
 
@@ -1798,7 +1809,8 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
                if (ret != BLK_STS_OK) {
                        if (ret == BLK_STS_RESOURCE ||
                                        ret == BLK_STS_DEV_RESOURCE) {
-                               list_add(&rq->queuelist, list);
+                               blk_mq_request_bypass_insert(rq,
+                                                       list_empty(list));
                                break;
                        }
                        blk_mq_end_request(rq, ret);
@@ -2148,12 +2160,7 @@ static void blk_mq_exit_hctx(struct request_queue *q,
        if (set->ops->exit_hctx)
                set->ops->exit_hctx(hctx, hctx_idx);
 
-       if (hctx->flags & BLK_MQ_F_BLOCKING)
-               cleanup_srcu_struct(hctx->srcu);
-
        blk_mq_remove_cpuhp(hctx);
-       blk_free_flush_queue(hctx->fq);
-       sbitmap_free(&hctx->ctx_map);
 }
 
 static void blk_mq_exit_hw_queues(struct request_queue *q,
@@ -2194,12 +2201,12 @@ static int blk_mq_init_hctx(struct request_queue *q,
         * runtime
         */
        hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *),
-                                       GFP_KERNEL, node);
+                       GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node);
        if (!hctx->ctxs)
                goto unregister_cpu_notifier;
 
-       if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), GFP_KERNEL,
-                             node))
+       if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8),
+                               GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node))
                goto free_ctxs;
 
        hctx->nr_ctx = 0;
@@ -2212,7 +2219,8 @@ static int blk_mq_init_hctx(struct request_queue *q,
            set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
                goto free_bitmap;
 
-       hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
+       hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size,
+                       GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
        if (!hctx->fq)
                goto exit_hctx;
 
@@ -2227,7 +2235,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
        return 0;
 
  free_fq:
-       kfree(hctx->fq);
+       blk_free_flush_queue(hctx->fq);
  exit_hctx:
        if (set->ops->exit_hctx)
                set->ops->exit_hctx(hctx, hctx_idx);
@@ -2524,12 +2532,14 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 
                node = blk_mq_hw_queue_to_node(q->mq_map, i);
                hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set),
-                                       GFP_KERNEL, node);
+                               GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
+                               node);
                if (!hctxs[i])
                        break;
 
-               if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL,
-                                               node)) {
+               if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask,
+                                       GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
+                                       node)) {
                        kfree(hctxs[i]);
                        hctxs[i] = NULL;
                        break;
@@ -2651,7 +2661,8 @@ err_exit:
 }
 EXPORT_SYMBOL(blk_mq_init_allocated_queue);
 
-void blk_mq_free_queue(struct request_queue *q)
+/* tags can _not_ be used after returning from blk_mq_exit_queue */
+void blk_mq_exit_queue(struct request_queue *q)
 {
        struct blk_mq_tag_set   *set = q->tag_set;
 
@@ -2878,6 +2889,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
                }
                if (ret)
                        break;
+               if (q->elevator && q->elevator->type->ops.mq.depth_updated)
+                       q->elevator->type->ops.mq.depth_updated(hctx);
        }
 
        if (!ret)