Merge branch 'for-linus' of git://git.kernel.dk/linux-block

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 3 Mar 2017 18:53:35 +0000 (10:53 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 3 Mar 2017 18:53:35 +0000 (10:53 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Mar 2017 18:53:35 +0000 (10:53 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Mar 2017 18:53:35 +0000 (10:53 -0800)
diff --git a/block/blk-core.c b/block/blk-core.c

index b9e857f..1086dac 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -578,7 +578,6 @@ void blk_cleanup_queue(struct request_queue *q)
                 q->queue_lock = &q->__queue_lock;
         spin_unlock_irq(lock);
  
-       bdi_unregister(q->backing_dev_info);
         put_disk_devt(q->disk_devt);
  
         /* @q is and will stay empty, shutdown and put */
diff --git a/block/blk-ioc.c b/block/blk-ioc.c

index 6bfa396..63898d2 100644 (file)
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -37,8 +37,8 @@ static void icq_free_icq_rcu(struct rcu_head *head)
  }
  
  /*
- * Exit an icq. Called with both ioc and q locked for sq, only ioc locked for
- * mq.
+ * Exit an icq. Called with ioc locked for blk-mq, and with both ioc
+ * and queue locked for legacy.
   */
  static void ioc_exit_icq(struct io_cq *icq)
  {
@@ -55,7 +55,10 @@ static void ioc_exit_icq(struct io_cq *icq)
         icq->flags |= ICQ_EXITED;
  }
  
-/* Release an icq.  Called with both ioc and q locked. */
+/*
+ * Release an icq. Called with ioc locked for blk-mq, and with both ioc
+ * and queue locked for legacy.
+ */
  static void ioc_destroy_icq(struct io_cq *icq)
  {
         struct io_context *ioc = icq->ioc;
@@ -63,7 +66,6 @@ static void ioc_destroy_icq(struct io_cq *icq)
         struct elevator_type *et = q->elevator->type;
  
         lockdep_assert_held(&ioc->lock);
-       lockdep_assert_held(q->queue_lock);
  
         radix_tree_delete(&ioc->icq_tree, icq->q->id);
         hlist_del_init(&icq->ioc_node);
@@ -223,24 +225,40 @@ void exit_io_context(struct task_struct *task)
         put_io_context_active(ioc);
  }
  
+static void __ioc_clear_queue(struct list_head *icq_list)
+{
+       unsigned long flags;
+
+       while (!list_empty(icq_list)) {
+               struct io_cq *icq = list_entry(icq_list->next,
+                                              struct io_cq, q_node);
+               struct io_context *ioc = icq->ioc;
+
+               spin_lock_irqsave(&ioc->lock, flags);
+               ioc_destroy_icq(icq);
+               spin_unlock_irqrestore(&ioc->lock, flags);
+       }
+}
+
  /**
   * ioc_clear_queue - break any ioc association with the specified queue
   * @q: request_queue being cleared
   *
- * Walk @q->icq_list and exit all io_cq's.  Must be called with @q locked.
+ * Walk @q->icq_list and exit all io_cq's.
   */
  void ioc_clear_queue(struct request_queue *q)
  {
-       lockdep_assert_held(q->queue_lock);
+       LIST_HEAD(icq_list);
  
-       while (!list_empty(&q->icq_list)) {
-               struct io_cq *icq = list_entry(q->icq_list.next,
-                                              struct io_cq, q_node);
-               struct io_context *ioc = icq->ioc;
+       spin_lock_irq(q->queue_lock);
+       list_splice_init(&q->icq_list, &icq_list);
  
-               spin_lock(&ioc->lock);
-               ioc_destroy_icq(icq);
-               spin_unlock(&ioc->lock);
+       if (q->mq_ops) {
+               spin_unlock_irq(q->queue_lock);
+               __ioc_clear_queue(&icq_list);
+       } else {
+               __ioc_clear_queue(&icq_list);
+               spin_unlock_irq(q->queue_lock);
         }
  }
  
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c

index 98c7b06..09af8ff 100644 (file)
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -110,15 +110,14 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
                                          struct blk_mq_alloc_data *data)
  {
         struct elevator_queue *e = q->elevator;
-       struct blk_mq_hw_ctx *hctx;
-       struct blk_mq_ctx *ctx;
         struct request *rq;
  
         blk_queue_enter_live(q);
-       ctx = blk_mq_get_ctx(q);
-       hctx = blk_mq_map_queue(q, ctx->cpu);
-
-       blk_mq_set_alloc_data(data, q, data->flags, ctx, hctx);
+       data->q = q;
+       if (likely(!data->ctx))
+               data->ctx = blk_mq_get_ctx(q);
+       if (likely(!data->hctx))
+               data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
  
         if (e) {
                 data->flags |= BLK_MQ_REQ_INTERNAL;
@@ -135,8 +134,6 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
                         rq = __blk_mq_alloc_request(data, op);
         } else {
                 rq = __blk_mq_alloc_request(data, op);
-               if (rq)
-                       data->hctx->tags->rqs[rq->tag] = rq;
         }
  
         if (rq) {
@@ -454,7 +451,8 @@ int blk_mq_sched_setup(struct request_queue *q)
          */
         ret = 0;
         queue_for_each_hw_ctx(q, hctx, i) {
-               hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0);
+               hctx->sched_tags = blk_mq_alloc_rq_map(set, i,
+                               q->nr_requests, set->reserved_tags);
                 if (!hctx->sched_tags) {
                         ret = -ENOMEM;
                         break;
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c

index 54c8436..e48bc2c 100644 (file)
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -181,7 +181,7 @@ found_tag:
  void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
                     struct blk_mq_ctx *ctx, unsigned int tag)
  {
-       if (tag >= tags->nr_reserved_tags) {
+       if (!blk_mq_tag_is_reserved(tags, tag)) {
                 const int real_tag = tag - tags->nr_reserved_tags;
  
                 BUG_ON(real_tag >= tags->nr_tags);
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h

index 6349742..5cb51e5 100644 (file)
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -85,4 +85,10 @@ static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx,
         hctx->tags->rqs[tag] = rq;
  }
  
+static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
+                                         unsigned int tag)
+{
+       return tag < tags->nr_reserved_tags;
+}
+
  #endif
diff --git a/block/blk-mq.c b/block/blk-mq.c

index 6f35b6f..b2fd175 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -77,10 +77,20 @@ void blk_mq_freeze_queue_start(struct request_queue *q)
  }
  EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
  
-static void blk_mq_freeze_queue_wait(struct request_queue *q)
+void blk_mq_freeze_queue_wait(struct request_queue *q)
  {
         wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
  }
+EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait);
+
+int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
+                                    unsigned long timeout)
+{
+       return wait_event_timeout(q->mq_freeze_wq,
+                                       percpu_ref_is_zero(&q->q_usage_counter),
+                                       timeout);
+}
+EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout);
  
  /*
   * Guarantee no request is in use, so we can change any data structure of
@@ -236,6 +246,7 @@ struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
                         }
                         rq->tag = tag;
                         rq->internal_tag = -1;
+                       data->hctx->tags->rqs[rq->tag] = rq;
                 }
  
                 blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
@@ -275,10 +286,9 @@ EXPORT_SYMBOL(blk_mq_alloc_request);
  struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
                 unsigned int flags, unsigned int hctx_idx)
  {
-       struct blk_mq_hw_ctx *hctx;
-       struct blk_mq_ctx *ctx;
+       struct blk_mq_alloc_data alloc_data = { .flags = flags };
         struct request *rq;
-       struct blk_mq_alloc_data alloc_data;
+       unsigned int cpu;
         int ret;
  
         /*
@@ -301,25 +311,23 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
          * Check if the hardware context is actually mapped to anything.
          * If not tell the caller that it should skip this queue.
          */
-       hctx = q->queue_hw_ctx[hctx_idx];
-       if (!blk_mq_hw_queue_mapped(hctx)) {
-               ret = -EXDEV;
-               goto out_queue_exit;
+       alloc_data.hctx = q->queue_hw_ctx[hctx_idx];
+       if (!blk_mq_hw_queue_mapped(alloc_data.hctx)) {
+               blk_queue_exit(q);
+               return ERR_PTR(-EXDEV);
         }
-       ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
+       cpu = cpumask_first(alloc_data.hctx->cpumask);
+       alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
  
-       blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
-       rq = __blk_mq_alloc_request(&alloc_data, rw);
-       if (!rq) {
-               ret = -EWOULDBLOCK;
-               goto out_queue_exit;
-       }
-
-       return rq;
+       rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
  
-out_queue_exit:
+       blk_mq_put_ctx(alloc_data.ctx);
         blk_queue_exit(q);
-       return ERR_PTR(ret);
+
+       if (!rq)
+               return ERR_PTR(-EWOULDBLOCK);
+
+       return rq;
  }
  EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
  
@@ -854,6 +862,9 @@ done:
                 return true;
         }
  
+       if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
+               data.flags |= BLK_MQ_REQ_RESERVED;
+
         rq->tag = blk_mq_get_tag(&data);
         if (rq->tag >= 0) {
                 if (blk_mq_tag_busy(data.hctx)) {
@@ -867,12 +878,9 @@ done:
         return false;
  }
  
-static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
-                                 struct request *rq)
+static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
+                                   struct request *rq)
  {
-       if (rq->tag == -1 || rq->internal_tag == -1)
-               return;
-
         blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag);
         rq->tag = -1;
  
@@ -882,6 +890,26 @@ static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
         }
  }
  
+static void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx,
+                                      struct request *rq)
+{
+       if (rq->tag == -1 || rq->internal_tag == -1)
+               return;
+
+       __blk_mq_put_driver_tag(hctx, rq);
+}
+
+static void blk_mq_put_driver_tag(struct request *rq)
+{
+       struct blk_mq_hw_ctx *hctx;
+
+       if (rq->tag == -1 || rq->internal_tag == -1)
+               return;
+
+       hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu);
+       __blk_mq_put_driver_tag(hctx, rq);
+}
+
  /*
   * If we fail getting a driver tag because all the driver tags are already
   * assigned and on the dispatch list, BUT the first entry does not have a
@@ -991,7 +1019,19 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
  
                 bd.rq = rq;
                 bd.list = dptr;
-               bd.last = list_empty(list);
+
+               /*
+                * Flag last if we have no more requests, or if we have more
+                * but can't assign a driver tag to it.
+                */
+               if (list_empty(list))
+                       bd.last = true;
+               else {
+                       struct request *nxt;
+
+                       nxt = list_first_entry(list, struct request, queuelist);
+                       bd.last = !blk_mq_get_driver_tag(nxt, NULL, false);
+               }
  
                 ret = q->mq_ops->queue_rq(hctx, &bd);
                 switch (ret) {
@@ -999,7 +1039,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
                         queued++;
                         break;
                 case BLK_MQ_RQ_QUEUE_BUSY:
-                       blk_mq_put_driver_tag(hctx, rq);
+                       blk_mq_put_driver_tag_hctx(hctx, rq);
                         list_add(&rq->queuelist, list);
                         __blk_mq_requeue_request(rq);
                         break;
@@ -1029,6 +1069,13 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
          * that is where we will continue on next queue run.
          */
         if (!list_empty(list)) {
+               /*
+                * If we got a driver tag for the next request already,
+                * free it again.
+                */
+               rq = list_first_entry(list, struct request, queuelist);
+               blk_mq_put_driver_tag(rq);
+
                 spin_lock(&hctx->lock);
                 list_splice_init(list, &hctx->dispatch);
                 spin_unlock(&hctx->lock);
@@ -1715,16 +1762,20 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
                                         unsigned int reserved_tags)
  {
         struct blk_mq_tags *tags;
+       int node;
  
-       tags = blk_mq_init_tags(nr_tags, reserved_tags,
-                               set->numa_node,
+       node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx);
+       if (node == NUMA_NO_NODE)
+               node = set->numa_node;
+
+       tags = blk_mq_init_tags(nr_tags, reserved_tags, node,
                                 BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
         if (!tags)
                 return NULL;
  
         tags->rqs = kzalloc_node(nr_tags * sizeof(struct request *),
                                  GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
-                                set->numa_node);
+                                node);
         if (!tags->rqs) {
                 blk_mq_free_tags(tags);
                 return NULL;
@@ -1732,7 +1783,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
  
         tags->static_rqs = kzalloc_node(nr_tags * sizeof(struct request *),
                                  GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
-                                set->numa_node);
+                                node);
         if (!tags->static_rqs) {
                 kfree(tags->rqs);
                 blk_mq_free_tags(tags);
@@ -1752,6 +1803,11 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
  {
         unsigned int i, j, entries_per_page, max_order = 4;
         size_t rq_size, left;
+       int node;
+
+       node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx);
+       if (node == NUMA_NO_NODE)
+               node = set->numa_node;
  
         INIT_LIST_HEAD(&tags->page_list);
  
@@ -1773,7 +1829,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
                         this_order--;
  
                 do {
-                       page = alloc_pages_node(set->numa_node,
+                       page = alloc_pages_node(node,
                                 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
                                 this_order);
                         if (page)
@@ -1806,7 +1862,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
                         if (set->ops->init_request) {
                                 if (set->ops->init_request(set->driver_data,
                                                 rq, hctx_idx, i,
-                                               set->numa_node)) {
+                                               node)) {
                                         tags->static_rqs[i] = NULL;
                                         goto fail;
                                 }
diff --git a/block/blk-mq.h b/block/blk-mq.h

index 24b2256..088ced0 100644 (file)
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -146,16 +146,6 @@ struct blk_mq_alloc_data {
         struct blk_mq_hw_ctx *hctx;
  };
  
-static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
-               struct request_queue *q, unsigned int flags,
-               struct blk_mq_ctx *ctx, struct blk_mq_hw_ctx *hctx)
-{
-       data->q = q;
-       data->flags = flags;
-       data->ctx = ctx;
-       data->hctx = hctx;
-}
-
  static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
  {
         if (data->flags & BLK_MQ_REQ_INTERNAL)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c

index 002af83..c44b321 100644 (file)
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -815,9 +815,7 @@ static void blk_release_queue(struct kobject *kobj)
         blkcg_exit_queue(q);
  
         if (q->elevator) {
-               spin_lock_irq(q->queue_lock);
                 ioc_clear_queue(q);
-               spin_unlock_irq(q->queue_lock);
                 elevator_exit(q->elevator);
         }
  
diff --git a/block/elevator.c b/block/elevator.c

index ac1c9f4..01139f5 100644 (file)
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -983,9 +983,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
                 if (old_registered)
                         elv_unregister_queue(q);
  
-               spin_lock_irq(q->queue_lock);
                 ioc_clear_queue(q);
-               spin_unlock_irq(q->queue_lock);
         }
  
         /* allocate, init and register new elevator */
diff --git a/block/genhd.c b/block/genhd.c

index 2f444b8..b26a5ea 100644 (file)
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -681,6 +681,11 @@ void del_gendisk(struct gendisk *disk)
         disk->flags &= ~GENHD_FL_UP;
  
         sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
+       /*
+        * Unregister bdi before releasing device numbers (as they can get
+        * reused and we'd get clashes in sysfs).
+        */
+       bdi_unregister(disk->queue->backing_dev_info);
         blk_unregister_queue(disk);
         blk_unregister_region(disk_devt(disk), disk->minors);
  
diff --git a/drivers/block/loop.c b/drivers/block/loop.c

index eeb1db7..0712365 100644 (file)
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1142,13 +1142,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
              (info->lo_flags & LO_FLAGS_AUTOCLEAR))
                 lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
  
-       if ((info->lo_flags & LO_FLAGS_PARTSCAN) &&
-            !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
-               lo->lo_flags |= LO_FLAGS_PARTSCAN;
-               lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
-               loop_reread_partitions(lo, lo->lo_device);
-       }
-
         lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
         lo->lo_init[0] = info->lo_init[0];
         lo->lo_init[1] = info->lo_init[1];
@@ -1163,6 +1156,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
  
   exit:
         blk_mq_unfreeze_queue(lo->lo_queue);
+
+       if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) &&
+            !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
+               lo->lo_flags |= LO_FLAGS_PARTSCAN;
+               lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
+               loop_reread_partitions(lo, lo->lo_device);
+       }
+
         return err;
  }
  
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c

index 1541cb8..7e4287b 100644 (file)
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -675,8 +675,10 @@ static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev)
             nbd->num_connections) {
                 int i;
  
-               for (i = 0; i < nbd->num_connections; i++)
+               for (i = 0; i < nbd->num_connections; i++) {
+                       sockfd_put(nbd->socks[i]->sock);
                         kfree(nbd->socks[i]);
+               }
                 kfree(nbd->socks);
                 nbd->socks = NULL;
                 nbd->num_connections = 0;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c

index 25ec4e5..9b3b57f 100644 (file)
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2344,6 +2344,53 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
  }
  EXPORT_SYMBOL_GPL(nvme_kill_queues);
  
+void nvme_unfreeze(struct nvme_ctrl *ctrl)
+{
+       struct nvme_ns *ns;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       list_for_each_entry(ns, &ctrl->namespaces, list)
+               blk_mq_unfreeze_queue(ns->queue);
+       mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_unfreeze);
+
+void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
+{
+       struct nvme_ns *ns;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       list_for_each_entry(ns, &ctrl->namespaces, list) {
+               timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
+               if (timeout <= 0)
+                       break;
+       }
+       mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
+
+void nvme_wait_freeze(struct nvme_ctrl *ctrl)
+{
+       struct nvme_ns *ns;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       list_for_each_entry(ns, &ctrl->namespaces, list)
+               blk_mq_freeze_queue_wait(ns->queue);
+       mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_wait_freeze);
+
+void nvme_start_freeze(struct nvme_ctrl *ctrl)
+{
+       struct nvme_ns *ns;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       list_for_each_entry(ns, &ctrl->namespaces, list)
+               blk_mq_freeze_queue_start(ns->queue);
+       mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_start_freeze);
+
  void nvme_stop_queues(struct nvme_ctrl *ctrl)
  {
         struct nvme_ns *ns;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h

index a3da1e9..2aa20e3 100644 (file)
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -294,6 +294,10 @@ void nvme_queue_async_events(struct nvme_ctrl *ctrl);
  void nvme_stop_queues(struct nvme_ctrl *ctrl);
  void nvme_start_queues(struct nvme_ctrl *ctrl);
  void nvme_kill_queues(struct nvme_ctrl *ctrl);
+void nvme_unfreeze(struct nvme_ctrl *ctrl);
+void nvme_wait_freeze(struct nvme_ctrl *ctrl);
+void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
+void nvme_start_freeze(struct nvme_ctrl *ctrl);
  
  #define NVME_QID_ANY -1
  struct request *nvme_alloc_request(struct request_queue *q,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c

index 57a1af5..26a5fd0 100644 (file)
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1038,9 +1038,10 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
  }
  
  static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
-                                                       int depth)
+                                                       int depth, int node)
  {
-       struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
+       struct nvme_queue *nvmeq = kzalloc_node(sizeof(*nvmeq), GFP_KERNEL,
+                                                       node);
         if (!nvmeq)
                 return NULL;
  
@@ -1217,7 +1218,8 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
  
         nvmeq = dev->queues[0];
         if (!nvmeq) {
-               nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
+               nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH,
+                                       dev_to_node(dev->dev));
                 if (!nvmeq)
                         return -ENOMEM;
         }
@@ -1309,7 +1311,9 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
         int ret = 0;
  
         for (i = dev->queue_count; i <= dev->max_qid; i++) {
-               if (!nvme_alloc_queue(dev, i, dev->q_depth)) {
+               /* vector == qid - 1, match nvme_create_queue */
+               if (!nvme_alloc_queue(dev, i, dev->q_depth,
+                    pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
                         ret = -ENOMEM;
                         break;
                 }
@@ -1671,21 +1675,34 @@ static void nvme_pci_disable(struct nvme_dev *dev)
  static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
  {
         int i, queues;
-       u32 csts = -1;
+       bool dead = true;
+       struct pci_dev *pdev = to_pci_dev(dev->dev);
  
         del_timer_sync(&dev->watchdog_timer);
  
         mutex_lock(&dev->shutdown_lock);
-       if (pci_is_enabled(to_pci_dev(dev->dev))) {
-               nvme_stop_queues(&dev->ctrl);
-               csts = readl(dev->bar + NVME_REG_CSTS);
+       if (pci_is_enabled(pdev)) {
+               u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+               if (dev->ctrl.state == NVME_CTRL_LIVE)
+                       nvme_start_freeze(&dev->ctrl);
+               dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
+                       pdev->error_state  != pci_channel_io_normal);
         }
  
+       /*
+        * Give the controller a chance to complete all entered requests if
+        * doing a safe shutdown.
+        */
+       if (!dead && shutdown)
+               nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+       nvme_stop_queues(&dev->ctrl);
+
         queues = dev->online_queues - 1;
         for (i = dev->queue_count - 1; i > 0; i--)
                 nvme_suspend_queue(dev->queues[i]);
  
-       if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
+       if (dead) {
                 /* A device might become IO incapable very soon during
                  * probe, before the admin queue is configured. Thus,
                  * queue_count can be 0 here.
@@ -1700,6 +1717,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
  
         blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
         blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
+
+       /*
+        * The driver will not be starting up queues again if shutting down so
+        * must flush all entered requests to their failed completion to avoid
+        * deadlocking blk-mq hot-cpu notifier.
+        */
+       if (shutdown)
+               nvme_start_queues(&dev->ctrl);
         mutex_unlock(&dev->shutdown_lock);
  }
  
@@ -1822,7 +1847,9 @@ static void nvme_reset_work(struct work_struct *work)
                 nvme_remove_namespaces(&dev->ctrl);
         } else {
                 nvme_start_queues(&dev->ctrl);
+               nvme_wait_freeze(&dev->ctrl);
                 nvme_dev_add(dev);
+               nvme_unfreeze(&dev->ctrl);
         }
  
         if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c

index 980eaf5..d571bc3 100644 (file)
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1298,6 +1298,22 @@ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
  }
  EXPORT_SYMBOL(pci_irq_get_affinity);
  
+/**
+ * pci_irq_get_node - return the numa node of a particular msi vector
+ * @pdev:      PCI device to operate on
+ * @vec:       device-relative interrupt vector index (0-based).
+ */
+int pci_irq_get_node(struct pci_dev *pdev, int vec)
+{
+       const struct cpumask *mask;
+
+       mask = pci_irq_get_affinity(pdev, vec);
+       if (mask)
+               return local_memory_node(cpu_to_node(cpumask_first(mask)));
+       return dev_to_node(&pdev->dev);
+}
+EXPORT_SYMBOL(pci_irq_get_node);
+
  struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
  {
         return to_pci_dev(desc->dev);
diff --git a/fs/block_dev.c b/fs/block_dev.c

index 77c30f1..2eca00e 100644 (file)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -870,6 +870,7 @@ static void init_once(void *foo)
  #ifdef CONFIG_SYSFS
         INIT_LIST_HEAD(&bdev->bd_holder_disks);
  #endif
+       bdev->bd_bdi = &noop_backing_dev_info;
         inode_init_once(&ei->vfs_inode);
         /* Initialize mutex for freeze. */
         mutex_init(&bdev->bd_fsfreeze_mutex);
@@ -884,8 +885,10 @@ static void bdev_evict_inode(struct inode *inode)
         spin_lock(&bdev_lock);
         list_del_init(&bdev->bd_list);
         spin_unlock(&bdev_lock);
-       if (bdev->bd_bdi != &noop_backing_dev_info)
+       if (bdev->bd_bdi != &noop_backing_dev_info) {
                 bdi_put(bdev->bd_bdi);
+               bdev->bd_bdi = &noop_backing_dev_info;
+       }
  }
  
  static const struct super_operations bdev_sops = {
@@ -988,7 +991,6 @@ struct block_device *bdget(dev_t dev)
                 bdev->bd_contains = NULL;
                 bdev->bd_super = NULL;
                 bdev->bd_inode = inode;
-               bdev->bd_bdi = &noop_backing_dev_info;
                 bdev->bd_block_size = i_blocksize(inode);
                 bdev->bd_part_count = 0;
                 bdev->bd_invalidated = 0;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h

index 001d30d..b296a90 100644 (file)
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -245,6 +245,9 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
  void blk_mq_freeze_queue(struct request_queue *q);
  void blk_mq_unfreeze_queue(struct request_queue *q);
  void blk_mq_freeze_queue_start(struct request_queue *q);
+void blk_mq_freeze_queue_wait(struct request_queue *q);
+int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
+                                    unsigned long timeout);
  int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
  
  int blk_mq_map_queues(struct blk_mq_tag_set *set);
diff --git a/include/linux/pci.h b/include/linux/pci.h

index 282ed32..eb3da1a 100644 (file)
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1323,6 +1323,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
  void pci_free_irq_vectors(struct pci_dev *dev);
  int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
  const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec);
+int pci_irq_get_node(struct pci_dev *pdev, int vec);
  
  #else
  static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
@@ -1370,6 +1371,11 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev,
  {
         return cpu_possible_mask;
  }
+
+static inline int pci_irq_get_node(struct pci_dev *pdev, int vec)
+{
+       return first_online_node;
+}
  #endif
  
  static inline int
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 3 Mar 2017 18:53:35 +0000 (10:53 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 3 Mar 2017 18:53:35 +0000 (10:53 -0800)
block/blk-core.c		patch \| blob \| history
block/blk-ioc.c		patch \| blob \| history
block/blk-mq-sched.c		patch \| blob \| history
block/blk-mq-tag.c		patch \| blob \| history
block/blk-mq-tag.h		patch \| blob \| history
block/blk-mq.c		patch \| blob \| history
block/blk-mq.h		patch \| blob \| history
block/blk-sysfs.c		patch \| blob \| history
block/elevator.c		patch \| blob \| history
block/genhd.c		patch \| blob \| history
drivers/block/loop.c		patch \| blob \| history
drivers/block/nbd.c		patch \| blob \| history
drivers/nvme/host/core.c		patch \| blob \| history
drivers/nvme/host/nvme.h		patch \| blob \| history
drivers/nvme/host/pci.c		patch \| blob \| history
drivers/pci/msi.c		patch \| blob \| history
fs/block_dev.c		patch \| blob \| history
include/linux/blk-mq.h		patch \| blob \| history
include/linux/pci.h		patch \| blob \| history