Merge branch 'for-linus' of git://git.kernel.dk/linux-block

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 14 Feb 2014 18:45:18 +0000 (10:45 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 14 Feb 2014 18:45:18 +0000 (10:45 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 14 Feb 2014 18:45:18 +0000 (10:45 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 14 Feb 2014 18:45:18 +0000 (10:45 -0800)
diff --git a/block/blk-core.c b/block/blk-core.c

index c00e0bd..853f927 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -693,11 +693,20 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
         if (!uninit_q)
                 return NULL;
  
+       uninit_q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
+       if (!uninit_q->flush_rq)
+               goto out_cleanup_queue;
+
         q = blk_init_allocated_queue(uninit_q, rfn, lock);
         if (!q)
-               blk_cleanup_queue(uninit_q);
-
+               goto out_free_flush_rq;
         return q;
+
+out_free_flush_rq:
+       kfree(uninit_q->flush_rq);
+out_cleanup_queue:
+       blk_cleanup_queue(uninit_q);
+       return NULL;
  }
  EXPORT_SYMBOL(blk_init_queue_node);
  
@@ -1127,7 +1136,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
  struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
  {
         if (q->mq_ops)
-               return blk_mq_alloc_request(q, rw, gfp_mask, false);
+               return blk_mq_alloc_request(q, rw, gfp_mask);
         else
                 return blk_old_get_request(q, rw, gfp_mask);
  }
@@ -1278,6 +1287,11 @@ void __blk_put_request(struct request_queue *q, struct request *req)
         if (unlikely(!q))
                 return;
  
+       if (q->mq_ops) {
+               blk_mq_free_request(req);
+               return;
+       }
+
         blk_pm_put_request(req);
  
         elv_completed_request(q, req);
diff --git a/block/blk-exec.c b/block/blk-exec.c

index bbfc072..c68613b 100644 (file)
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -65,7 +65,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
          * be resued after dying flag is set
          */
         if (q->mq_ops) {
-               blk_mq_insert_request(q, rq, true);
+               blk_mq_insert_request(q, rq, at_head, true);
                 return;
         }
  
diff --git a/block/blk-flush.c b/block/blk-flush.c

index 9288aaf..66e2b69 100644 (file)
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -130,20 +130,26 @@ static void blk_flush_restore_request(struct request *rq)
         blk_clear_rq_complete(rq);
  }
  
-static void mq_flush_data_run(struct work_struct *work)
+static void mq_flush_run(struct work_struct *work)
  {
         struct request *rq;
  
-       rq = container_of(work, struct request, mq_flush_data);
+       rq = container_of(work, struct request, mq_flush_work);
  
         memset(&rq->csd, 0, sizeof(rq->csd));
         blk_mq_run_request(rq, true, false);
  }
  
-static void blk_mq_flush_data_insert(struct request *rq)
+static bool blk_flush_queue_rq(struct request *rq)
  {
-       INIT_WORK(&rq->mq_flush_data, mq_flush_data_run);
-       kblockd_schedule_work(rq->q, &rq->mq_flush_data);
+       if (rq->q->mq_ops) {
+               INIT_WORK(&rq->mq_flush_work, mq_flush_run);
+               kblockd_schedule_work(rq->q, &rq->mq_flush_work);
+               return false;
+       } else {
+               list_add_tail(&rq->queuelist, &rq->q->queue_head);
+               return true;
+       }
  }
  
  /**
@@ -187,12 +193,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
  
         case REQ_FSEQ_DATA:
                 list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
-               if (q->mq_ops)
-                       blk_mq_flush_data_insert(rq);
-               else {
-                       list_add(&rq->queuelist, &q->queue_head);
-                       queued = true;
-               }
+               queued = blk_flush_queue_rq(rq);
                 break;
  
         case REQ_FSEQ_DONE:
@@ -216,9 +217,6 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
         }
  
         kicked = blk_kick_flush(q);
-       /* blk_mq_run_flush will run queue */
-       if (q->mq_ops)
-               return queued;
         return kicked | queued;
  }
  
@@ -230,10 +228,9 @@ static void flush_end_io(struct request *flush_rq, int error)
         struct request *rq, *n;
         unsigned long flags = 0;
  
-       if (q->mq_ops) {
-               blk_mq_free_request(flush_rq);
+       if (q->mq_ops)
                 spin_lock_irqsave(&q->mq_flush_lock, flags);
-       }
+
         running = &q->flush_queue[q->flush_running_idx];
         BUG_ON(q->flush_pending_idx == q->flush_running_idx);
  
@@ -263,49 +260,14 @@ static void flush_end_io(struct request *flush_rq, int error)
          * kblockd.
          */
         if (queued || q->flush_queue_delayed) {
-               if (!q->mq_ops)
-                       blk_run_queue_async(q);
-               else
-               /*
-                * This can be optimized to only run queues with requests
-                * queued if necessary.
-                */
-                       blk_mq_run_queues(q, true);
+               WARN_ON(q->mq_ops);
+               blk_run_queue_async(q);
         }
         q->flush_queue_delayed = 0;
         if (q->mq_ops)
                 spin_unlock_irqrestore(&q->mq_flush_lock, flags);
  }
  
-static void mq_flush_work(struct work_struct *work)
-{
-       struct request_queue *q;
-       struct request *rq;
-
-       q = container_of(work, struct request_queue, mq_flush_work);
-
-       /* We don't need set REQ_FLUSH_SEQ, it's for consistency */
-       rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ,
-               __GFP_WAIT|GFP_ATOMIC, true);
-       rq->cmd_type = REQ_TYPE_FS;
-       rq->end_io = flush_end_io;
-
-       blk_mq_run_request(rq, true, false);
-}
-
-/*
- * We can't directly use q->flush_rq, because it doesn't have tag and is not in
- * hctx->rqs[]. so we must allocate a new request, since we can't sleep here,
- * so offload the work to workqueue.
- *
- * Note: we assume a flush request finished in any hardware queue will flush
- * the whole disk cache.
- */
-static void mq_run_flush(struct request_queue *q)
-{
-       kblockd_schedule_work(q, &q->mq_flush_work);
-}
-
  /**
   * blk_kick_flush - consider issuing flush request
   * @q: request_queue being kicked
@@ -340,19 +302,31 @@ static bool blk_kick_flush(struct request_queue *q)
          * different from running_idx, which means flush is in flight.
          */
         q->flush_pending_idx ^= 1;
+
         if (q->mq_ops) {
-               mq_run_flush(q);
-               return true;
+               struct blk_mq_ctx *ctx = first_rq->mq_ctx;
+               struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu);
+
+               blk_mq_rq_init(hctx, q->flush_rq);
+               q->flush_rq->mq_ctx = ctx;
+
+               /*
+                * Reuse the tag value from the fist waiting request,
+                * with blk-mq the tag is generated during request
+                * allocation and drivers can rely on it being inside
+                * the range they asked for.
+                */
+               q->flush_rq->tag = first_rq->tag;
+       } else {
+               blk_rq_init(q, q->flush_rq);
         }
  
-       blk_rq_init(q, &q->flush_rq);
-       q->flush_rq.cmd_type = REQ_TYPE_FS;
-       q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
-       q->flush_rq.rq_disk = first_rq->rq_disk;
-       q->flush_rq.end_io = flush_end_io;
+       q->flush_rq->cmd_type = REQ_TYPE_FS;
+       q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
+       q->flush_rq->rq_disk = first_rq->rq_disk;
+       q->flush_rq->end_io = flush_end_io;
  
-       list_add_tail(&q->flush_rq.queuelist, &q->queue_head);
-       return true;
+       return blk_flush_queue_rq(q->flush_rq);
  }
  
  static void flush_data_end_io(struct request *rq, int error)
@@ -558,5 +532,4 @@ EXPORT_SYMBOL(blkdev_issue_flush);
  void blk_mq_init_flush(struct request_queue *q)
  {
         spin_lock_init(&q->mq_flush_lock);
-       INIT_WORK(&q->mq_flush_work, mq_flush_work);
  }
diff --git a/block/blk-lib.c b/block/blk-lib.c

index 2da76c9..97a733c 100644 (file)
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -119,6 +119,14 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
  
                 atomic_inc(&bb.done);
                 submit_bio(type, bio);
+
+               /*
+                * We can loop for a long time in here, if someone does
+                * full device discards (like mkfs). Be nice and allow
+                * us to schedule out to avoid softlocking if preempt
+                * is disabled.
+                */
+               cond_resched();
         }
         blk_finish_plug(&plug);
  
diff --git a/block/blk-merge.c b/block/blk-merge.c

index 8f8adaa..6c583f9 100644 (file)
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -21,6 +21,16 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
         if (!bio)
                 return 0;
  
+       /*
+        * This should probably be returning 0, but blk_add_request_payload()
+        * (Christoph!!!!)
+        */
+       if (bio->bi_rw & REQ_DISCARD)
+               return 1;
+
+       if (bio->bi_rw & REQ_WRITE_SAME)
+               return 1;
+
         fbio = bio;
         cluster = blk_queue_cluster(q);
         seg_size = 0;
@@ -161,30 +171,60 @@ new_segment:
         *bvprv = *bvec;
  }
  
-/*
- * map a request to scatterlist, return number of sg entries setup. Caller
- * must make sure sg can hold rq->nr_phys_segments entries
- */
-int blk_rq_map_sg(struct request_queue *q, struct request *rq,
-                 struct scatterlist *sglist)
+static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
+                            struct scatterlist *sglist,
+                            struct scatterlist **sg)
  {
         struct bio_vec bvec, bvprv = { NULL };
-       struct req_iterator iter;
-       struct scatterlist *sg;
+       struct bvec_iter iter;
         int nsegs, cluster;
  
         nsegs = 0;
         cluster = blk_queue_cluster(q);
  
-       /*
-        * for each bio in rq
-        */
-       sg = NULL;
-       rq_for_each_segment(bvec, rq, iter) {
-               __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg,
-                                    &nsegs, &cluster);
-       } /* segments in rq */
+       if (bio->bi_rw & REQ_DISCARD) {
+               /*
+                * This is a hack - drivers should be neither modifying the
+                * biovec, nor relying on bi_vcnt - but because of
+                * blk_add_request_payload(), a discard bio may or may not have
+                * a payload we need to set up here (thank you Christoph) and
+                * bi_vcnt is really the only way of telling if we need to.
+                */
+
+               if (bio->bi_vcnt)
+                       goto single_segment;
+
+               return 0;
+       }
+
+       if (bio->bi_rw & REQ_WRITE_SAME) {
+single_segment:
+               *sg = sglist;
+               bvec = bio_iovec(bio);
+               sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
+               return 1;
+       }
+
+       for_each_bio(bio)
+               bio_for_each_segment(bvec, bio, iter)
+                       __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg,
+                                            &nsegs, &cluster);
  
+       return nsegs;
+}
+
+/*
+ * map a request to scatterlist, return number of sg entries setup. Caller
+ * must make sure sg can hold rq->nr_phys_segments entries
+ */
+int blk_rq_map_sg(struct request_queue *q, struct request *rq,
+                 struct scatterlist *sglist)
+{
+       struct scatterlist *sg = NULL;
+       int nsegs = 0;
+
+       if (rq->bio)
+               nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg);
  
         if (unlikely(rq->cmd_flags & REQ_COPY_USER) &&
             (blk_rq_bytes(rq) & q->dma_pad_mask)) {
@@ -230,20 +270,13 @@ EXPORT_SYMBOL(blk_rq_map_sg);
  int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
                    struct scatterlist *sglist)
  {
-       struct bio_vec bvec, bvprv = { NULL };
-       struct scatterlist *sg;
-       int nsegs, cluster;
-       struct bvec_iter iter;
-
-       nsegs = 0;
-       cluster = blk_queue_cluster(q);
-
-       sg = NULL;
-       bio_for_each_segment(bvec, bio, iter) {
-               __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg,
-                                    &nsegs, &cluster);
-       } /* segments in bio */
+       struct scatterlist *sg = NULL;
+       int nsegs;
+       struct bio *next = bio->bi_next;
+       bio->bi_next = NULL;
  
+       nsegs = __blk_bios_map_sg(q, bio, sglist, &sg);
+       bio->bi_next = next;
         if (sg)
                 sg_mark_end(sg);
  
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c

index 5d70edc..83ae96c 100644 (file)
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -184,7 +184,7 @@ void blk_mq_free_tags(struct blk_mq_tags *tags)
  ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
  {
         char *orig_page = page;
-       int cpu;
+       unsigned int cpu;
  
         if (!tags)
                 return 0;
diff --git a/block/blk-mq.c b/block/blk-mq.c

index 57039fc..1fa9dd1 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -226,15 +226,14 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
         return rq;
  }
  
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
-               gfp_t gfp, bool reserved)
+struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp)
  {
         struct request *rq;
  
         if (blk_mq_queue_enter(q))
                 return NULL;
  
-       rq = blk_mq_alloc_request_pinned(q, rw, gfp, reserved);
+       rq = blk_mq_alloc_request_pinned(q, rw, gfp, false);
         if (rq)
                 blk_mq_put_ctx(rq->mq_ctx);
         return rq;
@@ -258,7 +257,7 @@ EXPORT_SYMBOL(blk_mq_alloc_reserved_request);
  /*
   * Re-init and set pdu, if we have it
   */
-static void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq)
+void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq)
  {
         blk_rq_init(hctx->queue, rq);
  
@@ -305,7 +304,7 @@ static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error)
                 bio_endio(bio, error);
  }
  
-void blk_mq_complete_request(struct request *rq, int error)
+void blk_mq_end_io(struct request *rq, int error)
  {
         struct bio *bio = rq->bio;
         unsigned int bytes = 0;
@@ -330,48 +329,55 @@ void blk_mq_complete_request(struct request *rq, int error)
         else
                 blk_mq_free_request(rq);
  }
+EXPORT_SYMBOL(blk_mq_end_io);
  
-void __blk_mq_end_io(struct request *rq, int error)
-{
-       if (!blk_mark_rq_complete(rq))
-               blk_mq_complete_request(rq, error);
-}
-
-static void blk_mq_end_io_remote(void *data)
+static void __blk_mq_complete_request_remote(void *data)
  {
         struct request *rq = data;
  
-       __blk_mq_end_io(rq, rq->errors);
+       rq->q->softirq_done_fn(rq);
  }
  
-/*
- * End IO on this request on a multiqueue enabled driver. We'll either do
- * it directly inline, or punt to a local IPI handler on the matching
- * remote CPU.
- */
-void blk_mq_end_io(struct request *rq, int error)
+void __blk_mq_complete_request(struct request *rq)
  {
         struct blk_mq_ctx *ctx = rq->mq_ctx;
         int cpu;
  
-       if (!ctx->ipi_redirect)
-               return __blk_mq_end_io(rq, error);
+       if (!ctx->ipi_redirect) {
+               rq->q->softirq_done_fn(rq);
+               return;
+       }
  
         cpu = get_cpu();
         if (cpu != ctx->cpu && cpu_online(ctx->cpu)) {
-               rq->errors = error;
-               rq->csd.func = blk_mq_end_io_remote;
+               rq->csd.func = __blk_mq_complete_request_remote;
                 rq->csd.info = rq;
                 rq->csd.flags = 0;
                 __smp_call_function_single(ctx->cpu, &rq->csd, 0);
         } else {
-               __blk_mq_end_io(rq, error);
+               rq->q->softirq_done_fn(rq);
         }
         put_cpu();
  }
-EXPORT_SYMBOL(blk_mq_end_io);
  
-static void blk_mq_start_request(struct request *rq)
+/**
+ * blk_mq_complete_request - end I/O on a request
+ * @rq:                the request being processed
+ *
+ * Description:
+ *     Ends all I/O on a request. It does not handle partial completions.
+ *     The actual completion happens out-of-order, through a IPI handler.
+ **/
+void blk_mq_complete_request(struct request *rq)
+{
+       if (unlikely(blk_should_fake_timeout(rq->q)))
+               return;
+       if (!blk_mark_rq_complete(rq))
+               __blk_mq_complete_request(rq);
+}
+EXPORT_SYMBOL(blk_mq_complete_request);
+
+static void blk_mq_start_request(struct request *rq, bool last)
  {
         struct request_queue *q = rq->q;
  
@@ -384,6 +390,25 @@ static void blk_mq_start_request(struct request *rq)
          */
         rq->deadline = jiffies + q->rq_timeout;
         set_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
+
+       if (q->dma_drain_size && blk_rq_bytes(rq)) {
+               /*
+                * Make sure space for the drain appears.  We know we can do
+                * this because max_hw_segments has been adjusted to be one
+                * fewer than the device can handle.
+                */
+               rq->nr_phys_segments++;
+       }
+
+       /*
+        * Flag the last request in the series so that drivers know when IO
+        * should be kicked off, if they don't do it on a per-request basis.
+        *
+        * Note: the flag isn't the only condition drivers should do kick off.
+        * If drive is busy, the last request might not have the bit set.
+        */
+       if (last)
+               rq->cmd_flags |= REQ_END;
  }
  
  static void blk_mq_requeue_request(struct request *rq)
@@ -392,6 +417,11 @@ static void blk_mq_requeue_request(struct request *rq)
  
         trace_block_rq_requeue(q, rq);
         clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
+
+       rq->cmd_flags &= ~REQ_END;
+
+       if (q->dma_drain_size && blk_rq_bytes(rq))
+               rq->nr_phys_segments--;
  }
  
  struct blk_mq_timeout_data {
@@ -559,19 +589,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
  
                 rq = list_first_entry(&rq_list, struct request, queuelist);
                 list_del_init(&rq->queuelist);
-               blk_mq_start_request(rq);
  
-               /*
-                * Last request in the series. Flag it as such, this
-                * enables drivers to know when IO should be kicked off,
-                * if they don't do it on a per-request basis.
-                *
-                * Note: the flag isn't the only condition drivers
-                * should do kick off. If drive is busy, the last
-                * request might not have the bit set.
-                */
-               if (list_empty(&rq_list))
-                       rq->cmd_flags |= REQ_END;
+               blk_mq_start_request(rq, list_empty(&rq_list));
  
                 ret = q->mq_ops->queue_rq(hctx, rq);
                 switch (ret) {
@@ -589,8 +608,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
                         break;
                 default:
                         pr_err("blk-mq: bad return on queue: %d\n", ret);
-                       rq->errors = -EIO;
                 case BLK_MQ_RQ_QUEUE_ERROR:
+                       rq->errors = -EIO;
                         blk_mq_end_io(rq, rq->errors);
                         break;
                 }
@@ -693,13 +712,16 @@ static void blk_mq_work_fn(struct work_struct *work)
  }
  
  static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
-                                   struct request *rq)
+                                   struct request *rq, bool at_head)
  {
         struct blk_mq_ctx *ctx = rq->mq_ctx;
  
         trace_block_rq_insert(hctx->queue, rq);
  
-       list_add_tail(&rq->queuelist, &ctx->rq_list);
+       if (at_head)
+               list_add(&rq->queuelist, &ctx->rq_list);
+       else
+               list_add_tail(&rq->queuelist, &ctx->rq_list);
         blk_mq_hctx_mark_pending(hctx, ctx);
  
         /*
@@ -709,7 +731,7 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
  }
  
  void blk_mq_insert_request(struct request_queue *q, struct request *rq,
-                          bool run_queue)
+                          bool at_head, bool run_queue)
  {
         struct blk_mq_hw_ctx *hctx;
         struct blk_mq_ctx *ctx, *current_ctx;
@@ -728,7 +750,7 @@ void blk_mq_insert_request(struct request_queue *q, struct request *rq,
                         rq->mq_ctx = ctx;
                 }
                 spin_lock(&ctx->lock);
-               __blk_mq_insert_request(hctx, rq);
+               __blk_mq_insert_request(hctx, rq, at_head);
                 spin_unlock(&ctx->lock);
  
                 blk_mq_put_ctx(current_ctx);
@@ -760,7 +782,7 @@ void blk_mq_run_request(struct request *rq, bool run_queue, bool async)
  
         /* ctx->cpu might be offline */
         spin_lock(&ctx->lock);
-       __blk_mq_insert_request(hctx, rq);
+       __blk_mq_insert_request(hctx, rq, false);
         spin_unlock(&ctx->lock);
  
         blk_mq_put_ctx(current_ctx);
@@ -798,7 +820,7 @@ static void blk_mq_insert_requests(struct request_queue *q,
                 rq = list_first_entry(list, struct request, queuelist);
                 list_del_init(&rq->queuelist);
                 rq->mq_ctx = ctx;
-               __blk_mq_insert_request(hctx, rq);
+               __blk_mq_insert_request(hctx, rq, false);
         }
         spin_unlock(&ctx->lock);
  
@@ -888,6 +910,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
  
         blk_queue_bounce(q, &bio);
  
+       if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
+               bio_endio(bio, -EIO);
+               return;
+       }
+
         if (use_plug && blk_attempt_plug_merge(q, bio, &request_count))
                 return;
  
@@ -950,7 +977,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
                 __blk_mq_free_request(hctx, ctx, rq);
         else {
                 blk_mq_bio_to_request(rq, bio);
-               __blk_mq_insert_request(hctx, rq);
+               __blk_mq_insert_request(hctx, rq, false);
         }
  
         spin_unlock(&ctx->lock);
@@ -1309,15 +1336,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
                 reg->queue_depth = BLK_MQ_MAX_DEPTH;
         }
  
-       /*
-        * Set aside a tag for flush requests.  It will only be used while
-        * another flush request is in progress but outside the driver.
-        *
-        * TODO: only allocate if flushes are supported
-        */
-       reg->queue_depth++;
-       reg->reserved_tags++;
-
         if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN))
                 return ERR_PTR(-EINVAL);
  
@@ -1360,17 +1378,27 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
         q->mq_ops = reg->ops;
         q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
  
+       q->sg_reserved_size = INT_MAX;
+
         blk_queue_make_request(q, blk_mq_make_request);
         blk_queue_rq_timed_out(q, reg->ops->timeout);
         if (reg->timeout)
                 blk_queue_rq_timeout(q, reg->timeout);
  
+       if (reg->ops->complete)
+               blk_queue_softirq_done(q, reg->ops->complete);
+
         blk_mq_init_flush(q);
         blk_mq_init_cpu_queues(q, reg->nr_hw_queues);
  
-       if (blk_mq_init_hw_queues(q, reg, driver_data))
+       q->flush_rq = kzalloc(round_up(sizeof(struct request) + reg->cmd_size,
+                               cache_line_size()), GFP_KERNEL);
+       if (!q->flush_rq)
                 goto err_hw;
  
+       if (blk_mq_init_hw_queues(q, reg, driver_data))
+               goto err_flush_rq;
+
         blk_mq_map_swqueue(q);
  
         mutex_lock(&all_q_mutex);
@@ -1378,6 +1406,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
         mutex_unlock(&all_q_mutex);
  
         return q;
+
+err_flush_rq:
+       kfree(q->flush_rq);
  err_hw:
         kfree(q->mq_map);
  err_map:
diff --git a/block/blk-mq.h b/block/blk-mq.h

index 5c39179..ed0035c 100644 (file)
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -22,13 +22,13 @@ struct blk_mq_ctx {
         struct kobject          kobj;
  };
  
-void __blk_mq_end_io(struct request *rq, int error);
-void blk_mq_complete_request(struct request *rq, int error);
+void __blk_mq_complete_request(struct request *rq);
  void blk_mq_run_request(struct request *rq, bool run_queue, bool async);
  void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
  void blk_mq_init_flush(struct request_queue *q);
  void blk_mq_drain_queue(struct request_queue *q);
  void blk_mq_free_queue(struct request_queue *q);
+void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq);
  
  /*
   * CPU hotplug helpers
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c

index 8095c4a..7500f87 100644 (file)
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -549,6 +549,8 @@ static void blk_release_queue(struct kobject *kobj)
         if (q->mq_ops)
                 blk_mq_free_queue(q);
  
+       kfree(q->flush_rq);
+
         blk_trace_shutdown(q);
  
         bdi_destroy(&q->backing_dev_info);
diff --git a/block/blk-timeout.c b/block/blk-timeout.c

index bba81c9..d96f706 100644 (file)
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -91,7 +91,7 @@ static void blk_rq_timed_out(struct request *req)
         case BLK_EH_HANDLED:
                 /* Can we use req->errors here? */
                 if (q->mq_ops)
-                       blk_mq_complete_request(req, req->errors);
+                       __blk_mq_complete_request(req);
                 else
                         __blk_complete_request(req);
                 break;
diff --git a/block/blk.h b/block/blk.h

index c90e1d8..d23b415 100644 (file)
--- a/block/blk.h
+++ b/block/blk.h
@@ -113,7 +113,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
                         q->flush_queue_delayed = 1;
                         return NULL;
                 }
-               if (unlikely(blk_queue_dying(q)) ||
+               if (unlikely(blk_queue_bypass(q)) ||
                     !q->elevator->type->ops.elevator_dispatch_fn(q, 0))
                         return NULL;
         }
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c

index 3107282..091b9ea 100644 (file)
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -60,7 +60,9 @@ enum {
         NULL_IRQ_NONE           = 0,
         NULL_IRQ_SOFTIRQ        = 1,
         NULL_IRQ_TIMER          = 2,
+};
  
+enum {
         NULL_Q_BIO              = 0,
         NULL_Q_RQ               = 1,
         NULL_Q_MQ               = 2,
@@ -172,18 +174,20 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
  
  static void end_cmd(struct nullb_cmd *cmd)
  {
-       if (cmd->rq) {
-               if (queue_mode == NULL_Q_MQ)
-                       blk_mq_end_io(cmd->rq, 0);
-               else {
-                       INIT_LIST_HEAD(&cmd->rq->queuelist);
-                       blk_end_request_all(cmd->rq, 0);
-               }
-       } else if (cmd->bio)
+       switch (queue_mode)  {
+       case NULL_Q_MQ:
+               blk_mq_end_io(cmd->rq, 0);
+               return;
+       case NULL_Q_RQ:
+               INIT_LIST_HEAD(&cmd->rq->queuelist);
+               blk_end_request_all(cmd->rq, 0);
+               break;
+       case NULL_Q_BIO:
                 bio_endio(cmd->bio, 0);
+               break;
+       }
  
-       if (queue_mode != NULL_Q_MQ)
-               free_cmd(cmd);
+       free_cmd(cmd);
  }
  
  static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
@@ -195,6 +199,7 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
         cq = &per_cpu(completion_queues, smp_processor_id());
  
         while ((entry = llist_del_all(&cq->list)) != NULL) {
+               entry = llist_reverse_order(entry);
                 do {
                         cmd = container_of(entry, struct nullb_cmd, ll_list);
                         end_cmd(cmd);
@@ -221,61 +226,31 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
  
  static void null_softirq_done_fn(struct request *rq)
  {
-       blk_end_request_all(rq, 0);
-}
-
-#ifdef CONFIG_SMP
-
-static void null_ipi_cmd_end_io(void *data)
-{
-       struct completion_queue *cq;
-       struct llist_node *entry, *next;
-       struct nullb_cmd *cmd;
-
-       cq = &per_cpu(completion_queues, smp_processor_id());
-
-       entry = llist_del_all(&cq->list);
-
-       while (entry) {
-               next = entry->next;
-               cmd = llist_entry(entry, struct nullb_cmd, ll_list);
-               end_cmd(cmd);
-               entry = next;
-       }
-}
-
-static void null_cmd_end_ipi(struct nullb_cmd *cmd)
-{
-       struct call_single_data *data = &cmd->csd;
-       int cpu = get_cpu();
-       struct completion_queue *cq = &per_cpu(completion_queues, cpu);
-
-       cmd->ll_list.next = NULL;
-
-       if (llist_add(&cmd->ll_list, &cq->list)) {
-               data->func = null_ipi_cmd_end_io;
-               data->flags = 0;
-               __smp_call_function_single(cpu, data, 0);
-       }
-
-       put_cpu();
+       end_cmd(rq->special);
  }
  
-#endif /* CONFIG_SMP */
-
  static inline void null_handle_cmd(struct nullb_cmd *cmd)
  {
         /* Complete IO by inline, softirq or timer */
         switch (irqmode) {
-       case NULL_IRQ_NONE:
-               end_cmd(cmd);
-               break;
         case NULL_IRQ_SOFTIRQ:
-#ifdef CONFIG_SMP
-               null_cmd_end_ipi(cmd);
-#else
+               switch (queue_mode)  {
+               case NULL_Q_MQ:
+                       blk_mq_complete_request(cmd->rq);
+                       break;
+               case NULL_Q_RQ:
+                       blk_complete_request(cmd->rq);
+                       break;
+               case NULL_Q_BIO:
+                       /*
+                        * XXX: no proper submitting cpu information available.
+                        */
+                       end_cmd(cmd);
+                       break;
+               }
+               break;
+       case NULL_IRQ_NONE:
                 end_cmd(cmd);
-#endif
                 break;
         case NULL_IRQ_TIMER:
                 null_cmd_end_timer(cmd);
@@ -411,6 +386,7 @@ static struct blk_mq_ops null_mq_ops = {
         .queue_rq       = null_queue_rq,
         .map_queue      = blk_mq_map_queue,
         .init_hctx      = null_init_hctx,
+       .complete       = null_softirq_done_fn,
  };
  
  static struct blk_mq_reg null_mq_reg = {
@@ -609,13 +585,6 @@ static int __init null_init(void)
  {
         unsigned int i;
  
-#if !defined(CONFIG_SMP)
-       if (irqmode == NULL_IRQ_SOFTIRQ) {
-               pr_warn("null_blk: softirq completions not available.\n");
-               pr_warn("null_blk: using direct completions.\n");
-               irqmode = NULL_IRQ_NONE;
-       }
-#endif
         if (bs > PAGE_SIZE) {
                 pr_warn("null_blk: invalid block size\n");
                 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c

index 6a680d4..b1cb3f4 100644 (file)
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -110,9 +110,9 @@ static int __virtblk_add_req(struct virtqueue *vq,
         return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
  }
  
-static inline void virtblk_request_done(struct virtblk_req *vbr)
+static inline void virtblk_request_done(struct request *req)
  {
-       struct request *req = vbr->req;
+       struct virtblk_req *vbr = req->special;
         int error = virtblk_result(vbr);
  
         if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
@@ -138,7 +138,7 @@ static void virtblk_done(struct virtqueue *vq)
         do {
                 virtqueue_disable_cb(vq);
                 while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
-                       virtblk_request_done(vbr);
+                       blk_mq_complete_request(vbr->req);
                         req_done = true;
                 }
                 if (unlikely(virtqueue_is_broken(vq)))
@@ -479,6 +479,7 @@ static struct blk_mq_ops virtio_mq_ops = {
         .map_queue      = blk_mq_map_queue,
         .alloc_hctx     = blk_mq_alloc_single_hw_queue,
         .free_hctx      = blk_mq_free_single_hw_queue,
+       .complete       = virtblk_request_done,
  };
  
  static struct blk_mq_reg virtio_mq_reg = {
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c

index 4b97b86..64c60ed 100644 (file)
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -299,7 +299,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
         BUG_ON(num != 0);
  }
  
-static void unmap_purged_grants(struct work_struct *work)
+void xen_blkbk_unmap_purged_grants(struct work_struct *work)
  {
         struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
         struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -375,7 +375,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
  
         pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean);
  
-       INIT_LIST_HEAD(&blkif->persistent_purge_list);
+       BUG_ON(!list_empty(&blkif->persistent_purge_list));
         root = &blkif->persistent_gnts;
  purge_list:
         foreach_grant_safe(persistent_gnt, n, root, node) {
@@ -420,7 +420,6 @@ finished:
         blkif->vbd.overflow_max_grants = 0;
  
         /* We can defer this work */
-       INIT_WORK(&blkif->persistent_purge_work, unmap_purged_grants);
         schedule_work(&blkif->persistent_purge_work);
         pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total);
         return;
@@ -625,9 +624,23 @@ purge_gnt_list:
                         print_stats(blkif);
         }
  
-       /* Since we are shutting down remove all pages from the buffer */
-       shrink_free_pagepool(blkif, 0 /* All */);
+       /* Drain pending purge work */
+       flush_work(&blkif->persistent_purge_work);
  
+       if (log_stats)
+               print_stats(blkif);
+
+       blkif->xenblkd = NULL;
+       xen_blkif_put(blkif);
+
+       return 0;
+}
+
+/*
+ * Remove persistent grants and empty the pool of free pages
+ */
+void xen_blkbk_free_caches(struct xen_blkif *blkif)
+{
         /* Free all persistent grant pages */
         if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
                 free_persistent_gnts(blkif, &blkif->persistent_gnts,
@@ -636,13 +649,8 @@ purge_gnt_list:
         BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
         blkif->persistent_gnt_c = 0;
  
-       if (log_stats)
-               print_stats(blkif);
-
-       blkif->xenblkd = NULL;
-       xen_blkif_put(blkif);
-
-       return 0;
+       /* Since we are shutting down remove all pages from the buffer */
+       shrink_free_pagepool(blkif, 0 /* All */);
  }
  
  /*
@@ -838,7 +846,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
         struct grant_page **pages = pending_req->indirect_pages;
         struct xen_blkif *blkif = pending_req->blkif;
         int indirect_grefs, rc, n, nseg, i;
-       struct blkif_request_segment_aligned *segments = NULL;
+       struct blkif_request_segment *segments = NULL;
  
         nseg = pending_req->nr_pages;
         indirect_grefs = INDIRECT_PAGES(nseg);
@@ -934,9 +942,7 @@ static void xen_blk_drain_io(struct xen_blkif *blkif)
  {
         atomic_set(&blkif->drain, 1);
         do {
-               /* The initial value is one, and one refcnt taken at the
-                * start of the xen_blkif_schedule thread. */
-               if (atomic_read(&blkif->refcnt) <= 2)
+               if (atomic_read(&blkif->inflight) == 0)
                         break;
                 wait_for_completion_interruptible_timeout(
                                 &blkif->drain_complete, HZ);
@@ -976,17 +982,30 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
          * the proper response on the ring.
          */
         if (atomic_dec_and_test(&pending_req->pendcnt)) {
-               xen_blkbk_unmap(pending_req->blkif,
+               struct xen_blkif *blkif = pending_req->blkif;
+
+               xen_blkbk_unmap(blkif,
                                 pending_req->segments,
                                 pending_req->nr_pages);
-               make_response(pending_req->blkif, pending_req->id,
+               make_response(blkif, pending_req->id,
                               pending_req->operation, pending_req->status);
-               xen_blkif_put(pending_req->blkif);
-               if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
-                       if (atomic_read(&pending_req->blkif->drain))
-                               complete(&pending_req->blkif->drain_complete);
+               free_req(blkif, pending_req);
+               /*
+                * Make sure the request is freed before releasing blkif,
+                * or there could be a race between free_req and the
+                * cleanup done in xen_blkif_free during shutdown.
+                *
+                * NB: The fact that we might try to wake up pending_free_wq
+                * before drain_complete (in case there's a drain going on)
+                * it's not a problem with our current implementation
+                * because we can assure there's no thread waiting on
+                * pending_free_wq if there's a drain going on, but it has
+                * to be taken into account if the current model is changed.
+                */
+               if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) {
+                       complete(&blkif->drain_complete);
                 }
-               free_req(pending_req->blkif, pending_req);
+               xen_blkif_put(blkif);
         }
  }
  
@@ -1240,6 +1259,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
          * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
          */
         xen_blkif_get(blkif);
+       atomic_inc(&blkif->inflight);
  
         for (i = 0; i < nseg; i++) {
                 while ((bio == NULL) ||
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h

index 8d88075..be05277 100644 (file)
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -57,7 +57,7 @@
  #define MAX_INDIRECT_SEGMENTS 256
  
  #define SEGS_PER_INDIRECT_FRAME \
-       (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
+       (PAGE_SIZE/sizeof(struct blkif_request_segment))
  #define MAX_INDIRECT_PAGES \
         ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
  #define INDIRECT_PAGES(_segs) \
@@ -278,6 +278,7 @@ struct xen_blkif {
         /* for barrier (drain) requests */
         struct completion       drain_complete;
         atomic_t                drain;
+       atomic_t                inflight;
         /* One thread per one blkif. */
         struct task_struct      *xenblkd;
         unsigned int            waiting_reqs;
@@ -376,6 +377,7 @@ int xen_blkif_xenbus_init(void);
  irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
  int xen_blkif_schedule(void *arg);
  int xen_blkif_purge_persistent(void *arg);
+void xen_blkbk_free_caches(struct xen_blkif *blkif);
  
  int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
                               struct backend_info *be, int state);
@@ -383,6 +385,7 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
  int xen_blkbk_barrier(struct xenbus_transaction xbt,
                       struct backend_info *be, int state);
  struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
+void xen_blkbk_unmap_purged_grants(struct work_struct *work);
  
  static inline void blkif_get_x86_32_req(struct blkif_request *dst,
                                         struct blkif_x86_32_request *src)
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c

index c2014a0..9a547e6 100644 (file)
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -125,8 +125,11 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
         blkif->persistent_gnts.rb_node = NULL;
         spin_lock_init(&blkif->free_pages_lock);
         INIT_LIST_HEAD(&blkif->free_pages);
+       INIT_LIST_HEAD(&blkif->persistent_purge_list);
         blkif->free_pages_num = 0;
         atomic_set(&blkif->persistent_gnt_in_use, 0);
+       atomic_set(&blkif->inflight, 0);
+       INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
  
         INIT_LIST_HEAD(&blkif->pending_free);
  
@@ -259,6 +262,17 @@ static void xen_blkif_free(struct xen_blkif *blkif)
         if (!atomic_dec_and_test(&blkif->refcnt))
                 BUG();
  
+       /* Remove all persistent grants and the cache of ballooned pages. */
+       xen_blkbk_free_caches(blkif);
+
+       /* Make sure everything is drained before shutting down */
+       BUG_ON(blkif->persistent_gnt_c != 0);
+       BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
+       BUG_ON(blkif->free_pages_num != 0);
+       BUG_ON(!list_empty(&blkif->persistent_purge_list));
+       BUG_ON(!list_empty(&blkif->free_pages));
+       BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
+
         /* Check that there is no request in use */
         list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
                 list_del(&req->free_list);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c

index 8dcfb54..efe1b47 100644 (file)
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -162,7 +162,7 @@ static DEFINE_SPINLOCK(minor_lock);
  #define DEV_NAME       "xvd"   /* name in /dev */
  
  #define SEGS_PER_INDIRECT_FRAME \
-       (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
+       (PAGE_SIZE/sizeof(struct blkif_request_segment))
  #define INDIRECT_GREFS(_segs) \
         ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
  
@@ -393,7 +393,7 @@ static int blkif_queue_request(struct request *req)
         unsigned long id;
         unsigned int fsect, lsect;
         int i, ref, n;
-       struct blkif_request_segment_aligned *segments = NULL;
+       struct blkif_request_segment *segments = NULL;
  
         /*
          * Used to store if we are able to queue the request by just using
@@ -550,7 +550,7 @@ static int blkif_queue_request(struct request *req)
                         } else {
                                 n = i % SEGS_PER_INDIRECT_FRAME;
                                 segments[n] =
-                                       (struct blkif_request_segment_aligned) {
+                                       (struct blkif_request_segment) {
                                                         .gref       = ref,
                                                         .first_sect = fsect,
                                                         .last_sect  = lsect };
@@ -1904,13 +1904,16 @@ static void blkback_changed(struct xenbus_device *dev,
         case XenbusStateReconfiguring:
         case XenbusStateReconfigured:
         case XenbusStateUnknown:
-       case XenbusStateClosed:
                 break;
  
         case XenbusStateConnected:
                 blkfront_connect(info);
                 break;
  
+       case XenbusStateClosed:
+               if (dev->state == XenbusStateClosed)
+                       break;
+               /* Missed the backend's Closing state -- fallthrough */
         case XenbusStateClosing:
                 blkfront_closing(info);
                 break;
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h

index 0c707e4..a4c7306 100644 (file)
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -210,7 +210,9 @@ BITMASK(GC_MARK,     struct bucket, gc_mark, 0, 2);
  #define GC_MARK_RECLAIMABLE    0
  #define GC_MARK_DIRTY          1
  #define GC_MARK_METADATA       2
-BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 13);
+#define GC_SECTORS_USED_SIZE   13
+#define MAX_GC_SECTORS_USED    (~(~0ULL << GC_SECTORS_USED_SIZE))
+BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE);
  BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1);
  
  #include "journal.h"
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c

index 4f6b594..3f74b4b 100644 (file)
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -23,7 +23,7 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
         for (k = i->start; k < bset_bkey_last(i); k = next) {
                 next = bkey_next(k);
  
-               printk(KERN_ERR "block %u key %zi/%u: ", set,
+               printk(KERN_ERR "block %u key %li/%u: ", set,
                        (uint64_t *) k - i->d, i->keys);
  
                 if (b->ops->key_dump)
@@ -1185,9 +1185,12 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
         struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO,
                                                      order);
         if (!out) {
+               struct page *outp;
+
                 BUG_ON(order > state->page_order);
  
-               out = page_address(mempool_alloc(state->pool, GFP_NOIO));
+               outp = mempool_alloc(state->pool, GFP_NOIO);
+               out = page_address(outp);
                 used_mempool = true;
                 order = state->page_order;
         }
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c

index 98cc0a8..5f9c2a6 100644 (file)
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1167,7 +1167,7 @@ uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k)
                 /* guard against overflow */
                 SET_GC_SECTORS_USED(g, min_t(unsigned,
                                              GC_SECTORS_USED(g) + KEY_SIZE(k),
-                                            (1 << 14) - 1));
+                                            MAX_GC_SECTORS_USED));
  
                 BUG_ON(!GC_SECTORS_USED(g));
         }
@@ -1805,7 +1805,7 @@ static bool btree_insert_key(struct btree *b, struct bkey *k,
  
  static size_t insert_u64s_remaining(struct btree *b)
  {
-       ssize_t ret = bch_btree_keys_u64s_remaining(&b->keys);
+       long ret = bch_btree_keys_u64s_remaining(&b->keys);
  
         /*
          * Might land in the middle of an existing extent and have to split it
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c

index 72cd213..5d5d031 100644 (file)
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -353,14 +353,14 @@ static void bch_data_insert_start(struct closure *cl)
         struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
         struct bio *bio = op->bio, *n;
  
-       if (op->bypass)
-               return bch_data_invalidate(cl);
-
         if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) {
                 set_gc_sectors(op->c);
                 wake_up_gc(op->c);
         }
  
+       if (op->bypass)
+               return bch_data_invalidate(cl);
+
         /*
          * Journal writes are marked REQ_FLUSH; if the original write was a
          * flush, it'll wait on the journal write.
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c

index c6ab693..d8458d4 100644 (file)
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -416,7 +416,7 @@ static int btree_bset_stats(struct btree_op *b_op, struct btree *b)
         return MAP_CONTINUE;
  }
  
-int bch_bset_print_stats(struct cache_set *c, char *buf)
+static int bch_bset_print_stats(struct cache_set *c, char *buf)
  {
         struct bset_stats_op op;
         int ret;
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c

index 0bad24d..0129b78 100644 (file)
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -114,6 +114,14 @@ void bio_integrity_free(struct bio *bio)
  }
  EXPORT_SYMBOL(bio_integrity_free);
  
+static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip)
+{
+       if (bip->bip_slab == BIO_POOL_NONE)
+               return BIP_INLINE_VECS;
+
+       return bvec_nr_vecs(bip->bip_slab);
+}
+
  /**
   * bio_integrity_add_page - Attach integrity metadata
   * @bio:       bio to update
@@ -129,7 +137,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
         struct bio_integrity_payload *bip = bio->bi_integrity;
         struct bio_vec *iv;
  
-       if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) {
+       if (bip->bip_vcnt >= bip_integrity_vecs(bip)) {
                 printk(KERN_ERR "%s: bip_vec full\n", __func__);
                 return 0;
         }
@@ -226,7 +234,8 @@ unsigned int bio_integrity_tag_size(struct bio *bio)
  }
  EXPORT_SYMBOL(bio_integrity_tag_size);
  
-int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set)
+static int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len,
+                            int set)
  {
         struct bio_integrity_payload *bip = bio->bi_integrity;
         struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
diff --git a/fs/bio.c b/fs/bio.c

index 75c49a3..8754e7b 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -611,7 +611,6 @@ EXPORT_SYMBOL(bio_clone_fast);
  struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
                              struct bio_set *bs)
  {
-       unsigned nr_iovecs = 0;
         struct bvec_iter iter;
         struct bio_vec bv;
         struct bio *bio;
@@ -638,10 +637,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
          *    __bio_clone_fast() anyways.
          */
  
-       bio_for_each_segment(bv, bio_src, iter)
-               nr_iovecs++;
-
-       bio = bio_alloc_bioset(gfp_mask, nr_iovecs, bs);
+       bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
         if (!bio)
                 return NULL;
  
@@ -650,9 +646,18 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
         bio->bi_iter.bi_sector  = bio_src->bi_iter.bi_sector;
         bio->bi_iter.bi_size    = bio_src->bi_iter.bi_size;
  
+       if (bio->bi_rw & REQ_DISCARD)
+               goto integrity_clone;
+
+       if (bio->bi_rw & REQ_WRITE_SAME) {
+               bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
+               goto integrity_clone;
+       }
+
         bio_for_each_segment(bv, bio_src, iter)
                 bio->bi_io_vec[bio->bi_vcnt++] = bv;
  
+integrity_clone:
         if (bio_integrity(bio_src)) {
                 int ret;
  
diff --git a/include/linux/bio.h b/include/linux/bio.h

index 7065452..5a4d39b 100644 (file)
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -250,6 +250,17 @@ static inline unsigned bio_segments(struct bio *bio)
         struct bio_vec bv;
         struct bvec_iter iter;
  
+       /*
+        * We special case discard/write same, because they interpret bi_size
+        * differently:
+        */
+
+       if (bio->bi_rw & REQ_DISCARD)
+               return 1;
+
+       if (bio->bi_rw & REQ_WRITE_SAME)
+               return 1;
+
         bio_for_each_segment(bv, bio, iter)
                 segs++;
  
@@ -332,6 +343,7 @@ extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
  extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
  
  extern struct bio_set *fs_bio_set;
+unsigned int bio_integrity_tag_size(struct bio *bio);
  
  static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
  {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h

index 161b231..18ba8a6 100644 (file)
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -83,6 +83,8 @@ struct blk_mq_ops {
          */
         rq_timed_out_fn         *timeout;
  
+       softirq_done_fn         *complete;
+
         /*
          * Override for hctx allocations (should probably go)
          */
@@ -119,11 +121,12 @@ void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struc
  
  void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
  
-void blk_mq_insert_request(struct request_queue *, struct request *, bool);
+void blk_mq_insert_request(struct request_queue *, struct request *,
+               bool, bool);
  void blk_mq_run_queues(struct request_queue *q, bool async);
  void blk_mq_free_request(struct request *rq);
  bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, bool reserved);
+struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp);
  struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp);
  struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag);
  
@@ -133,6 +136,8 @@ void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int);
  
  void blk_mq_end_io(struct request *rq, int error);
  
+void blk_mq_complete_request(struct request *rq);
+
  void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
  void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
  void blk_mq_stop_hw_queues(struct request_queue *q);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index 8678c43..4afa4f8 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -98,7 +98,7 @@ struct request {
         struct list_head queuelist;
         union {
                 struct call_single_data csd;
-               struct work_struct mq_flush_data;
+               struct work_struct mq_flush_work;
         };
  
         struct request_queue *q;
@@ -448,13 +448,8 @@ struct request_queue {
         unsigned long           flush_pending_since;
         struct list_head        flush_queue[2];
         struct list_head        flush_data_in_flight;
-       union {
-               struct request  flush_rq;
-               struct {
-                       spinlock_t mq_flush_lock;
-                       struct work_struct mq_flush_work;
-               };
-       };
+       struct request          *flush_rq;
+       spinlock_t              mq_flush_lock;
  
         struct mutex            sysfs_lock;
  
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h

index ae665ac..32ec05a 100644 (file)
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -113,13 +113,13 @@ typedef uint64_t blkif_sector_t;
   * it's less than the number provided by the backend. The indirect_grefs field
   * in blkif_request_indirect should be filled by the frontend with the
   * grant references of the pages that are holding the indirect segments.
- * This pages are filled with an array of blkif_request_segment_aligned
- * that hold the information about the segments. The number of indirect
- * pages to use is determined by the maximum number of segments
- * a indirect request contains. Every indirect page can contain a maximum
- * of 512 segments (PAGE_SIZE/sizeof(blkif_request_segment_aligned)),
- * so to calculate the number of indirect pages to use we have to do
- * ceil(indirect_segments/512).
+ * These pages are filled with an array of blkif_request_segment that hold the
+ * information about the segments. The number of indirect pages to use is
+ * determined by the number of segments an indirect request contains. Every
+ * indirect page can contain a maximum of
+ * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to
+ * calculate the number of indirect pages to use we have to do
+ * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))).
   *
   * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
   * create the "feature-max-indirect-segments" node!
@@ -135,13 +135,12 @@ typedef uint64_t blkif_sector_t;
  
  #define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
  
-struct blkif_request_segment_aligned {
-       grant_ref_t gref;        /* reference to I/O buffer frame        */
-       /* @first_sect: first sector in frame to transfer (inclusive).   */
-       /* @last_sect: last sector in frame to transfer (inclusive).     */
-       uint8_t     first_sect, last_sect;
-       uint16_t    _pad; /* padding to make it 8 bytes, so it's cache-aligned */
-} __attribute__((__packed__));
+struct blkif_request_segment {
+               grant_ref_t gref;        /* reference to I/O buffer frame        */
+               /* @first_sect: first sector in frame to transfer (inclusive).   */
+               /* @last_sect: last sector in frame to transfer (inclusive).     */
+               uint8_t     first_sect, last_sect;
+};
  
  struct blkif_request_rw {
         uint8_t        nr_segments;  /* number of segments                   */
@@ -151,12 +150,7 @@ struct blkif_request_rw {
  #endif
         uint64_t       id;           /* private guest value, echoed in resp  */
         blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
-       struct blkif_request_segment {
-               grant_ref_t gref;        /* reference to I/O buffer frame        */
-               /* @first_sect: first sector in frame to transfer (inclusive).   */
-               /* @last_sect: last sector in frame to transfer (inclusive).     */
-               uint8_t     first_sect, last_sect;
-       } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  } __attribute__((__packed__));
  
  struct blkif_request_discard {
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c

index 7be235f..93d145e 100644 (file)
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -54,9 +54,7 @@ static inline void move_tags(unsigned *dst, unsigned *dst_nr,
  /*
   * Try to steal tags from a remote cpu's percpu freelist.
   *
- * We first check how many percpu freelists have tags - we don't steal tags
- * unless enough percpu freelists have tags on them that it's possible more than
- * half the total tags could be stuck on remote percpu freelists.
+ * We first check how many percpu freelists have tags
   *
   * Then we iterate through the cpus until we find some tags - we don't attempt
   * to find the "best" cpu to steal from, to keep cacheline bouncing to a
@@ -69,8 +67,7 @@ static inline void steal_tags(struct percpu_ida *pool,
         struct percpu_ida_cpu *remote;
  
         for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags);
-            cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2;
-            cpus_have_tags--) {
+            cpus_have_tags; cpus_have_tags--) {
                 cpu = cpumask_next(cpu, &pool->cpus_have_tags);
  
                 if (cpu >= nr_cpu_ids) {
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 14 Feb 2014 18:45:18 +0000 (10:45 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 14 Feb 2014 18:45:18 +0000 (10:45 -0800)
block/blk-core.c		patch \| blob \| history
block/blk-exec.c		patch \| blob \| history
block/blk-flush.c		patch \| blob \| history
block/blk-lib.c		patch \| blob \| history
block/blk-merge.c		patch \| blob \| history
block/blk-mq-tag.c		patch \| blob \| history
block/blk-mq.c		patch \| blob \| history
block/blk-mq.h		patch \| blob \| history
block/blk-sysfs.c		patch \| blob \| history
block/blk-timeout.c		patch \| blob \| history
block/blk.h		patch \| blob \| history
drivers/block/null_blk.c		patch \| blob \| history
drivers/block/virtio_blk.c		patch \| blob \| history
drivers/block/xen-blkback/blkback.c		patch \| blob \| history
drivers/block/xen-blkback/common.h		patch \| blob \| history
drivers/block/xen-blkback/xenbus.c		patch \| blob \| history
drivers/block/xen-blkfront.c		patch \| blob \| history
drivers/md/bcache/bcache.h		patch \| blob \| history
drivers/md/bcache/bset.c		patch \| blob \| history
drivers/md/bcache/btree.c		patch \| blob \| history
drivers/md/bcache/request.c		patch \| blob \| history
drivers/md/bcache/sysfs.c		patch \| blob \| history
fs/bio-integrity.c		patch \| blob \| history
fs/bio.c		patch \| blob \| history
include/linux/bio.h		patch \| blob \| history
include/linux/blk-mq.h		patch \| blob \| history
include/linux/blkdev.h		patch \| blob \| history
include/xen/interface/io/blkif.h		patch \| blob \| history
lib/percpu_ida.c		patch \| blob \| history