Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 8 May 2008 17:48:36 +0000 (10:48 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 8 May 2008 17:48:36 +0000 (10:48 -0700)
* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
  Revert "relay: fix splice problem"
  docbook: fix bio missing parameter
  block: use unitialized_var() in bio_alloc_bioset()
  block: avoid duplicate calls to get_part() in disk stat code
  cfq-iosched: make io priorities inherit CPU scheduling class as well as nice
  block: optimize generic_unplug_device()
  block: get rid of likely/unlikely predictions in merge logic
  vfs: splice remove_suid() cleanup
  cfq-iosched: fix RCU race in the cfq io_context destructor handling
  block: adjust tagging function queue bit locking
  block: sysfs store function needs to grab queue_lock and use queue_flag_*()

14 files changed:
block/blk-core.c
block/blk-ioc.c
block/blk-merge.c
block/blk-sysfs.c
block/blk-tag.c
block/cfq-iosched.c
drivers/block/aoe/aoecmd.c
fs/bio.c
fs/splice.c
include/linux/fs.h
include/linux/genhd.h
include/linux/ioprio.h
kernel/relay.c
mm/filemap.c

index b754a4a..2987fe4 100644 (file)
@@ -54,15 +54,16 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
 
 static void drive_stat_acct(struct request *rq, int new_io)
 {
+       struct hd_struct *part;
        int rw = rq_data_dir(rq);
 
        if (!blk_fs_request(rq) || !rq->rq_disk)
                return;
 
-       if (!new_io) {
-               __all_stat_inc(rq->rq_disk, merges[rw], rq->sector);
-       } else {
-               struct hd_struct *part = get_part(rq->rq_disk, rq->sector);
+       part = get_part(rq->rq_disk, rq->sector);
+       if (!new_io)
+               __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector);
+       else {
                disk_round_stats(rq->rq_disk);
                rq->rq_disk->in_flight++;
                if (part) {
@@ -253,9 +254,11 @@ EXPORT_SYMBOL(__generic_unplug_device);
  **/
 void generic_unplug_device(struct request_queue *q)
 {
-       spin_lock_irq(q->queue_lock);
-       __generic_unplug_device(q);
-       spin_unlock_irq(q->queue_lock);
+       if (blk_queue_plugged(q)) {
+               spin_lock_irq(q->queue_lock);
+               __generic_unplug_device(q);
+               spin_unlock_irq(q->queue_lock);
+       }
 }
 EXPORT_SYMBOL(generic_unplug_device);
 
@@ -1536,10 +1539,11 @@ static int __end_that_request_first(struct request *req, int error,
        }
 
        if (blk_fs_request(req) && req->rq_disk) {
+               struct hd_struct *part = get_part(req->rq_disk, req->sector);
                const int rw = rq_data_dir(req);
 
-               all_stat_add(req->rq_disk, sectors[rw],
-                            nr_bytes >> 9, req->sector);
+               all_stat_add(req->rq_disk, part, sectors[rw],
+                               nr_bytes >> 9, req->sector);
        }
 
        total_bytes = bio_nbytes = 0;
@@ -1725,8 +1729,8 @@ static void end_that_request_last(struct request *req, int error)
                const int rw = rq_data_dir(req);
                struct hd_struct *part = get_part(disk, req->sector);
 
-               __all_stat_inc(disk, ios[rw], req->sector);
-               __all_stat_add(disk, ticks[rw], duration, req->sector);
+               __all_stat_inc(disk, part, ios[rw], req->sector);
+               __all_stat_add(disk, part, ticks[rw], duration, req->sector);
                disk_round_stats(disk);
                disk->in_flight--;
                if (part) {
index e34df7c..012f065 100644 (file)
@@ -41,8 +41,8 @@ int put_io_context(struct io_context *ioc)
                rcu_read_lock();
                if (ioc->aic && ioc->aic->dtor)
                        ioc->aic->dtor(ioc->aic);
-               rcu_read_unlock();
                cfq_dtor(ioc);
+               rcu_read_unlock();
 
                kmem_cache_free(iocontext_cachep, ioc);
                return 1;
index 73b2356..651136a 100644 (file)
@@ -149,9 +149,9 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
 static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio,
                                 struct bio *nxt)
 {
-       if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
+       if (!bio_flagged(bio, BIO_SEG_VALID))
                blk_recount_segments(q, bio);
-       if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))
+       if (!bio_flagged(nxt, BIO_SEG_VALID))
                blk_recount_segments(q, nxt);
        if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
            BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))
@@ -312,9 +312,9 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
                        q->last_merge = NULL;
                return 0;
        }
-       if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))
+       if (!bio_flagged(req->biotail, BIO_SEG_VALID))
                blk_recount_segments(q, req->biotail);
-       if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
+       if (!bio_flagged(bio, BIO_SEG_VALID))
                blk_recount_segments(q, bio);
        len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
        if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio))
@@ -352,9 +352,9 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
                return 0;
        }
        len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
-       if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
+       if (!bio_flagged(bio, BIO_SEG_VALID))
                blk_recount_segments(q, bio);
-       if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))
+       if (!bio_flagged(req->bio, BIO_SEG_VALID))
                blk_recount_segments(q, req->bio);
        if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
            !BIOVEC_VIRT_OVERSIZE(len)) {
index e85c401..304ec73 100644 (file)
@@ -146,11 +146,13 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
        unsigned long nm;
        ssize_t ret = queue_var_store(&nm, page, count);
 
+       spin_lock_irq(q->queue_lock);
        if (nm)
-              set_bit(QUEUE_FLAG_NOMERGES, &q->queue_flags);
+               queue_flag_set(QUEUE_FLAG_NOMERGES, q);
        else
-              clear_bit(QUEUE_FLAG_NOMERGES, &q->queue_flags);
+               queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
 
+       spin_unlock_irq(q->queue_lock);
        return ret;
 }
 
index de64e04..32667be 100644 (file)
@@ -70,7 +70,7 @@ void __blk_queue_free_tags(struct request_queue *q)
        __blk_free_tags(bqt);
 
        q->queue_tags = NULL;
-       queue_flag_clear(QUEUE_FLAG_QUEUED, q);
+       queue_flag_clear_unlocked(QUEUE_FLAG_QUEUED, q);
 }
 
 /**
@@ -98,7 +98,7 @@ EXPORT_SYMBOL(blk_free_tags);
  **/
 void blk_queue_free_tags(struct request_queue *q)
 {
-       queue_flag_clear(QUEUE_FLAG_QUEUED, q);
+       queue_flag_clear_unlocked(QUEUE_FLAG_QUEUED, q);
 }
 EXPORT_SYMBOL(blk_queue_free_tags);
 
@@ -171,6 +171,9 @@ EXPORT_SYMBOL(blk_init_tags);
  * @q:  the request queue for the device
  * @depth:  the maximum queue depth supported
  * @tags: the tag to use
+ *
+ * Queue lock must be held here if the function is called to resize an
+ * existing map.
  **/
 int blk_queue_init_tags(struct request_queue *q, int depth,
                        struct blk_queue_tag *tags)
@@ -197,7 +200,7 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
         * assign it, all done
         */
        q->queue_tags = tags;
-       queue_flag_set(QUEUE_FLAG_QUEUED, q);
+       queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q);
        INIT_LIST_HEAD(&q->tag_busy_list);
        return 0;
 fail:
index f4e1006..b399c62 100644 (file)
@@ -1142,6 +1142,17 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
        kmem_cache_free(cfq_pool, cfqq);
 }
 
+static void
+__call_for_each_cic(struct io_context *ioc,
+                   void (*func)(struct io_context *, struct cfq_io_context *))
+{
+       struct cfq_io_context *cic;
+       struct hlist_node *n;
+
+       hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
+               func(ioc, cic);
+}
+
 /*
  * Call func for each cic attached to this ioc.
  */
@@ -1149,12 +1160,8 @@ static void
 call_for_each_cic(struct io_context *ioc,
                  void (*func)(struct io_context *, struct cfq_io_context *))
 {
-       struct cfq_io_context *cic;
-       struct hlist_node *n;
-
        rcu_read_lock();
-       hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
-               func(ioc, cic);
+       __call_for_each_cic(ioc, func);
        rcu_read_unlock();
 }
 
@@ -1198,7 +1205,7 @@ static void cfq_free_io_context(struct io_context *ioc)
         * should be ok to iterate over the known list, we will see all cic's
         * since no new ones are added.
         */
-       call_for_each_cic(ioc, cic_free_func);
+       __call_for_each_cic(ioc, cic_free_func);
 }
 
 static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
@@ -1296,10 +1303,10 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
                printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
        case IOPRIO_CLASS_NONE:
                /*
-                * no prio set, place us in the middle of the BE classes
+                * no prio set, inherit CPU scheduling settings
                 */
                cfqq->ioprio = task_nice_ioprio(tsk);
-               cfqq->ioprio_class = IOPRIO_CLASS_BE;
+               cfqq->ioprio_class = task_nice_ioclass(tsk);
                break;
        case IOPRIO_CLASS_RT:
                cfqq->ioprio = task_ioprio(ioc);
index 8fc429c..41f818b 100644 (file)
@@ -755,11 +755,13 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector
 {
        unsigned long n_sect = bio->bi_size >> 9;
        const int rw = bio_data_dir(bio);
+       struct hd_struct *part;
 
-       all_stat_inc(disk, ios[rw], sector);
-       all_stat_add(disk, ticks[rw], duration, sector);
-       all_stat_add(disk, sectors[rw], n_sect, sector);
-       all_stat_add(disk, io_ticks, duration, sector);
+       part = get_part(disk, sector);
+       all_stat_inc(disk, part, ios[rw], sector);
+       all_stat_add(disk, part, ticks[rw], duration, sector);
+       all_stat_add(disk, part, sectors[rw], n_sect, sector);
+       all_stat_add(disk, part, io_ticks, duration, sector);
 }
 
 void
index 799f86d..7856257 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -158,7 +158,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 
                bio_init(bio);
                if (likely(nr_iovecs)) {
-                       unsigned long idx = 0; /* shut up gcc */
+                       unsigned long uninitialized_var(idx);
 
                        bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
                        if (unlikely(!bvl)) {
@@ -963,6 +963,7 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
  *     @data: pointer to buffer to copy
  *     @len: length in bytes
  *     @gfp_mask: allocation flags for bio and page allocation
+ *     @reading: data direction is READ
  *
  *     copy the kernel address into a bio suitable for io to a block
  *     device. Returns an error pointer in case of error.
index 633f58e..7815003 100644 (file)
@@ -811,24 +811,19 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 {
        struct address_space *mapping = out->f_mapping;
        struct inode *inode = mapping->host;
-       int killsuid, killpriv;
+       struct splice_desc sd = {
+               .total_len = len,
+               .flags = flags,
+               .pos = *ppos,
+               .u.file = out,
+       };
        ssize_t ret;
-       int err = 0;
-
-       killpriv = security_inode_need_killpriv(out->f_path.dentry);
-       killsuid = should_remove_suid(out->f_path.dentry);
-       if (unlikely(killsuid || killpriv)) {
-               mutex_lock(&inode->i_mutex);
-               if (killpriv)
-                       err = security_inode_killpriv(out->f_path.dentry);
-               if (!err && killsuid)
-                       err = __remove_suid(out->f_path.dentry, killsuid);
-               mutex_unlock(&inode->i_mutex);
-               if (err)
-                       return err;
-       }
 
-       ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
+       inode_double_lock(inode, pipe->inode);
+       ret = remove_suid(out->f_path.dentry);
+       if (likely(!ret))
+               ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
+       inode_double_unlock(inode, pipe->inode);
        if (ret > 0) {
                unsigned long nr_pages;
 
@@ -840,6 +835,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
                 * sync it.
                 */
                if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
+                       int err;
+
                        mutex_lock(&inode->i_mutex);
                        err = generic_osync_inode(inode, mapping,
                                                  OSYNC_METADATA|OSYNC_DATA);
@@ -1075,7 +1072,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 
        ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
        if (ret > 0)
-               *ppos = sd.pos;
+               *ppos += ret;
 
        return ret;
 }
index 7e0fa9e..f413085 100644 (file)
@@ -1816,7 +1816,6 @@ extern void iget_failed(struct inode *);
 extern void clear_inode(struct inode *);
 extern void destroy_inode(struct inode *);
 extern struct inode *new_inode(struct super_block *);
-extern int __remove_suid(struct dentry *, int);
 extern int should_remove_suid(struct dentry *);
 extern int remove_suid(struct dentry *);
 
index ecd2bf6..e9874e7 100644 (file)
@@ -178,17 +178,17 @@ static inline struct hd_struct *get_part(struct gendisk *gendiskp,
 
 static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)      {
        int i;
+
        for_each_possible_cpu(i)
                memset(per_cpu_ptr(gendiskp->dkstats, i), value,
-                               sizeof (struct disk_stats));
+                               sizeof(struct disk_stats));
 }              
 
 #define __part_stat_add(part, field, addnd)                            \
        (per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd)
 
-#define __all_stat_add(gendiskp, field, addnd, sector)         \
+#define __all_stat_add(gendiskp, part, field, addnd, sector)   \
 ({                                                             \
-       struct hd_struct *part = get_part(gendiskp, sector);    \
        if (part)                                               \
                __part_stat_add(part, field, addnd);            \
        __disk_stat_add(gendiskp, field, addnd);                \
@@ -203,11 +203,13 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) {
        res;                                                            \
 })
 
-static inline void part_stat_set_all(struct hd_struct *part, int value)        {
+static inline void part_stat_set_all(struct hd_struct *part, int value)
+{
        int i;
+
        for_each_possible_cpu(i)
                memset(per_cpu_ptr(part->dkstats, i), value,
-                      sizeof(struct disk_stats));
+                               sizeof(struct disk_stats));
 }
                                
 #else /* !CONFIG_SMP */
@@ -223,9 +225,8 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)
 #define __part_stat_add(part, field, addnd) \
        (part->dkstats.field += addnd)
 
-#define __all_stat_add(gendiskp, field, addnd, sector)         \
+#define __all_stat_add(gendiskp, part, field, addnd, sector)   \
 ({                                                             \
-       struct hd_struct *part = get_part(gendiskp, sector);    \
        if (part)                                               \
                part->dkstats.field += addnd;                   \
        __disk_stat_add(gendiskp, field, addnd);                \
@@ -276,10 +277,10 @@ static inline void part_stat_set_all(struct hd_struct *part, int value)
 #define part_stat_sub(gendiskp, field, subnd) \
                part_stat_add(gendiskp, field, -subnd)
 
-#define all_stat_add(gendiskp, field, addnd, sector)           \
+#define all_stat_add(gendiskp, part, field, addnd, sector)     \
        do {                                                    \
                preempt_disable();                              \
-               __all_stat_add(gendiskp, field, addnd, sector); \
+               __all_stat_add(gendiskp, part, field, addnd, sector);   \
                preempt_enable();                               \
        } while (0)
 
@@ -288,15 +289,15 @@ static inline void part_stat_set_all(struct hd_struct *part, int value)
 #define all_stat_dec(gendiskp, field, sector) \
                all_stat_add(gendiskp, field, -1, sector)
 
-#define __all_stat_inc(gendiskp, field, sector) \
-               __all_stat_add(gendiskp, field, 1, sector)
-#define all_stat_inc(gendiskp, field, sector) \
-               all_stat_add(gendiskp, field, 1, sector)
+#define __all_stat_inc(gendiskp, part, field, sector) \
+               __all_stat_add(gendiskp, part, field, 1, sector)
+#define all_stat_inc(gendiskp, part, field, sector) \
+               all_stat_add(gendiskp, part, field, 1, sector)
 
-#define __all_stat_sub(gendiskp, field, subnd, sector) \
-               __all_stat_add(gendiskp, field, -subnd, sector)
-#define all_stat_sub(gendiskp, field, subnd, sector) \
-               all_stat_add(gendiskp, field, -subnd, sector)
+#define __all_stat_sub(gendiskp, part, field, subnd, sector) \
+               __all_stat_add(gendiskp, part, field, -subnd, sector)
+#define all_stat_sub(gendiskp, part, field, subnd, sector) \
+               all_stat_add(gendiskp, part, field, -subnd, sector)
 
 /* Inlines to alloc and free disk stats in struct gendisk */
 #ifdef  CONFIG_SMP
index 2a3bb1b..f98a656 100644 (file)
@@ -68,6 +68,20 @@ static inline int task_nice_ioprio(struct task_struct *task)
 }
 
 /*
+ * This is for the case where the task hasn't asked for a specific IO class.
+ * Check for idle and rt task process, and return appropriate IO class.
+ */
+static inline int task_nice_ioclass(struct task_struct *task)
+{
+       if (task->policy == SCHED_IDLE)
+               return IOPRIO_CLASS_IDLE;
+       else if (task->policy == SCHED_FIFO || task->policy == SCHED_RR)
+               return IOPRIO_CLASS_RT;
+       else
+               return IOPRIO_CLASS_BE;
+}
+
+/*
  * For inheritance, return the highest of the two given priorities
  */
 extern int ioprio_best(unsigned short aprio, unsigned short bprio);
index 7de644c..bc24dcd 100644 (file)
@@ -1191,7 +1191,7 @@ static ssize_t relay_file_splice_read(struct file *in,
        ret = 0;
        spliced = 0;
 
-       while (len && !spliced) {
+       while (len) {
                ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret);
                if (ret < 0)
                        break;
index 239d361..2dead9a 100644 (file)
@@ -1655,7 +1655,7 @@ int should_remove_suid(struct dentry *dentry)
 }
 EXPORT_SYMBOL(should_remove_suid);
 
-int __remove_suid(struct dentry *dentry, int kill)
+static int __remove_suid(struct dentry *dentry, int kill)
 {
        struct iattr newattrs;