Merge branch 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 13 Jan 2011 18:45:01 +0000 (10:45 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 13 Jan 2011 18:45:01 +0000 (10:45 -0800)
* 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block: (43 commits)
  block: ensure that completion error gets properly traced
  blktrace: add missing probe argument to block_bio_complete
  block cfq: don't use atomic_t for cfq_group
  block cfq: don't use atomic_t for cfq_queue
  block: trace event block fix unassigned field
  block: add internal hd part table references
  block: fix accounting bug on cross partition merges
  kref: add kref_test_and_get
  bio-integrity: mark kintegrityd_wq highpri and CPU intensive
  block: make kblockd_workqueue smarter
  Revert "sd: implement sd_check_events()"
  block: Clean up exit_io_context() source code.
  Fix compile warnings due to missing removal of a 'ret' variable
  fs/block: type signature of major_to_index(int) to major_to_index(unsigned)
  block: convert !IS_ERR(p) && p to !IS_ERR_NOR_NULL(p)
  cfq-iosched: don't check cfqg in choose_service_tree()
  fs/splice: Pull buf->ops->confirm() from splice_from_pipe actors
  cdrom: export cdrom_check_events()
  sd: implement sd_check_events()
  sr: implement sr_check_events()
  ...

25 files changed:
1  2 
block/blk-merge.c
block/cfq-iosched.c
block/ioctl.c
drivers/md/dm-table.c
drivers/md/md.c
drivers/scsi/scsi_lib.c
drivers/scsi/sd.c
fs/block_dev.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h
fs/char_dev.c
fs/ext3/super.c
fs/ext4/super.c
fs/gfs2/ops_fstype.c
fs/nfsd/vfs.c
fs/nilfs2/super.c
fs/ocfs2/cluster/heartbeat.c
fs/reiserfs/journal.c
fs/splice.c
fs/super.c
fs/xfs/linux-2.6/xfs_super.c
include/linux/blkdev.h
include/linux/fs.h
include/scsi/scsi.h
kernel/power/swap.c

diff --combined block/blk-merge.c
@@@ -21,7 -21,7 +21,7 @@@ static unsigned int __blk_recalc_rq_seg
                return 0;
  
        fbio = bio;
 -      cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
 +      cluster = blk_queue_cluster(q);
        seg_size = 0;
        nr_phys_segs = 0;
        for_each_bio(bio) {
@@@ -87,7 -87,7 +87,7 @@@ EXPORT_SYMBOL(blk_recount_segments)
  static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
                                   struct bio *nxt)
  {
 -      if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
 +      if (!blk_queue_cluster(q))
                return 0;
  
        if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
@@@ -123,7 -123,7 +123,7 @@@ int blk_rq_map_sg(struct request_queue 
        int nsegs, cluster;
  
        nsegs = 0;
 -      cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
 +      cluster = blk_queue_cluster(q);
  
        /*
         * for each bio in rq
@@@ -351,11 -351,12 +351,12 @@@ static void blk_account_io_merge(struc
                int cpu;
  
                cpu = part_stat_lock();
-               part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
+               part = req->part;
  
                part_round_stats(cpu, part);
                part_dec_in_flight(part, rq_data_dir(req));
  
+               hd_struct_put(part);
                part_stat_unlock();
        }
  }
diff --combined block/cfq-iosched.c
@@@ -87,7 -87,6 +87,6 @@@ struct cfq_rb_root 
        unsigned count;
        unsigned total_weight;
        u64 min_vdisktime;
-       struct rb_node *active;
  };
  #define CFQ_RB_ROOT   (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
                        .count = 0, .min_vdisktime = 0, }
@@@ -97,7 -96,7 +96,7 @@@
   */
  struct cfq_queue {
        /* reference count */
-       atomic_t ref;
+       int ref;
        /* various state flags, see below */
        unsigned int flags;
        /* parent cfq_data */
@@@ -180,7 -179,6 +179,6 @@@ struct cfq_group 
        /* group service_tree key */
        u64 vdisktime;
        unsigned int weight;
-       bool on_st;
  
        /* number of cfqq currently on this group */
        int nr_cfqq;
        struct blkio_group blkg;
  #ifdef CONFIG_CFQ_GROUP_IOSCHED
        struct hlist_node cfqd_node;
-       atomic_t ref;
+       int ref;
  #endif
        /* number of requests that are on the dispatch list or inside driver */
        int dispatched;
@@@ -563,11 -561,6 +561,6 @@@ static void update_min_vdisktime(struc
        u64 vdisktime = st->min_vdisktime;
        struct cfq_group *cfqg;
  
-       if (st->active) {
-               cfqg = rb_entry_cfqg(st->active);
-               vdisktime = cfqg->vdisktime;
-       }
        if (st->left) {
                cfqg = rb_entry_cfqg(st->left);
                vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
@@@ -646,11 -639,11 +639,11 @@@ cfq_set_prio_slice(struct cfq_data *cfq
  static inline bool cfq_slice_used(struct cfq_queue *cfqq)
  {
        if (cfq_cfqq_slice_new(cfqq))
-               return 0;
+               return false;
        if (time_before(jiffies, cfqq->slice_end))
-               return 0;
+               return false;
  
-       return 1;
+       return true;
  }
  
  /*
@@@ -869,7 -862,7 +862,7 @@@ cfq_group_service_tree_add(struct cfq_d
        struct rb_node *n;
  
        cfqg->nr_cfqq++;
-       if (cfqg->on_st)
+       if (!RB_EMPTY_NODE(&cfqg->rb_node))
                return;
  
        /*
                cfqg->vdisktime = st->min_vdisktime;
  
        __cfq_group_service_tree_add(st, cfqg);
-       cfqg->on_st = true;
        st->total_weight += cfqg->weight;
  }
  
@@@ -894,9 -886,6 +886,6 @@@ cfq_group_service_tree_del(struct cfq_d
  {
        struct cfq_rb_root *st = &cfqd->grp_service_tree;
  
-       if (st->active == &cfqg->rb_node)
-               st->active = NULL;
        BUG_ON(cfqg->nr_cfqq < 1);
        cfqg->nr_cfqq--;
  
                return;
  
        cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
-       cfqg->on_st = false;
        st->total_weight -= cfqg->weight;
        if (!RB_EMPTY_NODE(&cfqg->rb_node))
                cfq_rb_erase(&cfqg->rb_node, st);
@@@ -1026,11 -1014,11 +1014,11 @@@ cfq_find_alloc_cfqg(struct cfq_data *cf
         * elevator which will be dropped by either elevator exit
         * or cgroup deletion path depending on who is exiting first.
         */
-       atomic_set(&cfqg->ref, 1);
+       cfqg->ref = 1;
  
        /*
         * Add group onto cgroup list. It might happen that bdi->dev is
 -       * not initiliazed yet. Initialize this new group without major
 +       * not initialized yet. Initialize this new group without major
         * and minor info and this info will be filled in once a new thread
         * comes for IO. See code above.
         */
@@@ -1071,7 -1059,7 +1059,7 @@@ static struct cfq_group *cfq_get_cfqg(s
  
  static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
  {
-       atomic_inc(&cfqg->ref);
+       cfqg->ref++;
        return cfqg;
  }
  
@@@ -1083,7 -1071,7 +1071,7 @@@ static void cfq_link_cfqq_cfqg(struct c
  
        cfqq->cfqg = cfqg;
        /* cfqq reference on cfqg */
-       atomic_inc(&cfqq->cfqg->ref);
+       cfqq->cfqg->ref++;
  }
  
  static void cfq_put_cfqg(struct cfq_group *cfqg)
        struct cfq_rb_root *st;
        int i, j;
  
-       BUG_ON(atomic_read(&cfqg->ref) <= 0);
-       if (!atomic_dec_and_test(&cfqg->ref))
+       BUG_ON(cfqg->ref <= 0);
+       cfqg->ref--;
+       if (cfqg->ref)
                return;
        for_each_cfqg_st(cfqg, i, j, st)
-               BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL);
+               BUG_ON(!RB_EMPTY_ROOT(&st->rb));
        kfree(cfqg);
  }
  
@@@ -1200,7 -1189,7 +1189,7 @@@ static void cfq_service_tree_add(struc
                        cfq_group_service_tree_del(cfqd, cfqq->cfqg);
                cfqq->orig_cfqg = cfqq->cfqg;
                cfqq->cfqg = &cfqd->root_group;
-               atomic_inc(&cfqd->root_group.ref);
+               cfqd->root_group.ref++;
                group_changed = 1;
        } else if (!cfqd->cfq_group_isolation
                   && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) {
@@@ -1687,9 -1676,6 +1676,6 @@@ __cfq_slice_expired(struct cfq_data *cf
        if (cfqq == cfqd->active_queue)
                cfqd->active_queue = NULL;
  
-       if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active)
-               cfqd->grp_service_tree.active = NULL;
        if (cfqd->active_cic) {
                put_io_context(cfqd->active_cic->ioc);
                cfqd->active_cic = NULL;
@@@ -1901,10 -1887,10 +1887,10 @@@ static bool cfq_should_idle(struct cfq_
         * in their service tree.
         */
        if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
-               return 1;
+               return true;
        cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
                        service_tree->count);
-       return 0;
+       return false;
  }
  
  static void cfq_arm_slice_timer(struct cfq_data *cfqd)
@@@ -2040,7 -2026,7 +2026,7 @@@ static int cfqq_process_refs(struct cfq
        int process_refs, io_refs;
  
        io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE];
-       process_refs = atomic_read(&cfqq->ref) - io_refs;
+       process_refs = cfqq->ref - io_refs;
        BUG_ON(process_refs < 0);
        return process_refs;
  }
@@@ -2080,10 -2066,10 +2066,10 @@@ static void cfq_setup_merge(struct cfq_
         */
        if (new_process_refs >= process_refs) {
                cfqq->new_cfqq = new_cfqq;
-               atomic_add(process_refs, &new_cfqq->ref);
+               new_cfqq->ref += process_refs;
        } else {
                new_cfqq->new_cfqq = cfqq;
-               atomic_add(new_process_refs, &cfqq->ref);
+               cfqq->ref += new_process_refs;
        }
  }
  
@@@ -2116,12 -2102,7 +2102,7 @@@ static void choose_service_tree(struct 
        unsigned count;
        struct cfq_rb_root *st;
        unsigned group_slice;
-       if (!cfqg) {
-               cfqd->serving_prio = IDLE_WORKLOAD;
-               cfqd->workload_expires = jiffies + 1;
-               return;
-       }
+       enum wl_prio_t original_prio = cfqd->serving_prio;
  
        /* Choose next priority. RT > BE > IDLE */
        if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
                return;
        }
  
+       if (original_prio != cfqd->serving_prio)
+               goto new_workload;
        /*
         * For RT and BE, we have to choose also the type
         * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
        if (count && !time_after(jiffies, cfqd->workload_expires))
                return;
  
+ new_workload:
        /* otherwise select new workload type */
        cfqd->serving_type =
                cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
@@@ -2199,7 -2184,6 +2184,6 @@@ static struct cfq_group *cfq_get_next_c
        if (RB_EMPTY_ROOT(&st->rb))
                return NULL;
        cfqg = cfq_rb_first_group(st);
-       st->active = &cfqg->rb_node;
        update_min_vdisktime(st);
        return cfqg;
  }
@@@ -2293,6 -2277,17 +2277,17 @@@ static struct cfq_queue *cfq_select_que
                goto keep_queue;
        }
  
+       /*
+        * This is a deep seek queue, but the device is much faster than
+        * the queue can deliver, don't idle
+        **/
+       if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
+           (cfq_cfqq_slice_new(cfqq) ||
+           (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) {
+               cfq_clear_cfqq_deep(cfqq);
+               cfq_clear_cfqq_idle_window(cfqq);
+       }
        if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
                cfqq = NULL;
                goto keep_queue;
@@@ -2367,12 -2362,12 +2362,12 @@@ static inline bool cfq_slice_used_soon(
  {
        /* the queue hasn't finished any request, can't estimate */
        if (cfq_cfqq_slice_new(cfqq))
-               return 1;
+               return true;
        if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
                cfqq->slice_end))
-               return 1;
+               return true;
  
-       return 0;
+       return false;
  }
  
  static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
@@@ -2538,9 -2533,10 +2533,10 @@@ static void cfq_put_queue(struct cfq_qu
        struct cfq_data *cfqd = cfqq->cfqd;
        struct cfq_group *cfqg, *orig_cfqg;
  
-       BUG_ON(atomic_read(&cfqq->ref) <= 0);
+       BUG_ON(cfqq->ref <= 0);
  
-       if (!atomic_dec_and_test(&cfqq->ref))
+       cfqq->ref--;
+       if (cfqq->ref)
                return;
  
        cfq_log_cfqq(cfqd, cfqq, "put_queue");
@@@ -2843,7 -2839,7 +2839,7 @@@ static void cfq_init_cfqq(struct cfq_da
        RB_CLEAR_NODE(&cfqq->p_node);
        INIT_LIST_HEAD(&cfqq->fifo);
  
-       atomic_set(&cfqq->ref, 0);
+       cfqq->ref = 0;
        cfqq->cfqd = cfqd;
  
        cfq_mark_cfqq_prio_changed(cfqq);
@@@ -2979,11 -2975,11 +2975,11 @@@ cfq_get_queue(struct cfq_data *cfqd, bo
         * pin the queue now that it's allocated, scheduler exit will prune it
         */
        if (!is_sync && !(*async_cfqq)) {
-               atomic_inc(&cfqq->ref);
+               cfqq->ref++;
                *async_cfqq = cfqq;
        }
  
-       atomic_inc(&cfqq->ref);
+       cfqq->ref++;
        return cfqq;
  }
  
@@@ -3265,6 -3261,10 +3261,10 @@@ cfq_should_preempt(struct cfq_data *cfq
        if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
                return true;
  
+       /* An idle queue should not be idle now for some reason */
+       if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
+               return true;
        if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
                return false;
  
@@@ -3681,13 -3681,13 +3681,13 @@@ new_queue
        }
  
        cfqq->allocated[rw]++;
-       atomic_inc(&cfqq->ref);
-       spin_unlock_irqrestore(q->queue_lock, flags);
+       cfqq->ref++;
        rq->elevator_private = cic;
        rq->elevator_private2 = cfqq;
        rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg);
+       spin_unlock_irqrestore(q->queue_lock, flags);
        return 0;
  
  queue_fail:
@@@ -3862,6 -3862,10 +3862,10 @@@ static void *cfq_init_queue(struct requ
        if (!cfqd)
                return NULL;
  
+       /*
+        * Don't need take queue_lock in the routine, since we are
+        * initializing the ioscheduler, and nobody is using cfqd
+        */
        cfqd->cic_index = i;
  
        /* Init root service tree */
         * Take a reference to root group which we never drop. This is just
         * to make sure that cfq_put_cfqg() does not try to kfree root group
         */
-       atomic_set(&cfqg->ref, 1);
+       cfqg->ref = 1;
        rcu_read_lock();
        cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
                                        (void *)cfqd, 0);
         * will not attempt to free it.
         */
        cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
-       atomic_inc(&cfqd->oom_cfqq.ref);
+       cfqd->oom_cfqq.ref++;
        cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
  
        INIT_LIST_HEAD(&cfqd->cic_list);
diff --combined block/ioctl.c
@@@ -5,6 -5,7 +5,6 @@@
  #include <linux/hdreg.h>
  #include <linux/backing-dev.h>
  #include <linux/buffer_head.h>
 -#include <linux/smp_lock.h>
  #include <linux/blktrace_api.h>
  #include <asm/uaccess.h>
  
@@@ -294,11 -295,12 +294,12 @@@ int blkdev_ioctl(struct block_device *b
                        return -EINVAL;
                if (get_user(n, (int __user *) arg))
                        return -EFAULT;
-               if (!(mode & FMODE_EXCL) && bd_claim(bdev, &bdev) < 0)
+               if (!(mode & FMODE_EXCL) &&
+                   blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
                        return -EBUSY;
                ret = set_blocksize(bdev, n);
                if (!(mode & FMODE_EXCL))
-                       bd_release(bdev);
+                       blkdev_put(bdev, mode | FMODE_EXCL);
                return ret;
        case BLKPG:
                ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg);
diff --combined drivers/md/dm-table.c
@@@ -325,15 -325,18 +325,18 @@@ static int open_dev(struct dm_dev_inter
  
        BUG_ON(d->dm_dev.bdev);
  
-       bdev = open_by_devnum(dev, d->dm_dev.mode);
+       bdev = blkdev_get_by_dev(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr);
        if (IS_ERR(bdev))
                return PTR_ERR(bdev);
-       r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md));
-       if (r)
-               blkdev_put(bdev, d->dm_dev.mode);
-       else
-               d->dm_dev.bdev = bdev;
-       return r;
+       r = bd_link_disk_holder(bdev, dm_disk(md));
+       if (r) {
+               blkdev_put(bdev, d->dm_dev.mode | FMODE_EXCL);
+               return r;
+       }
+       d->dm_dev.bdev = bdev;
+       return 0;
  }
  
  /*
@@@ -344,8 -347,7 +347,7 @@@ static void close_dev(struct dm_dev_int
        if (!d->dm_dev.bdev)
                return;
  
-       bd_release_from_disk(d->dm_dev.bdev, dm_disk(md));
-       blkdev_put(d->dm_dev.bdev, d->dm_dev.mode);
+       blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL);
        d->dm_dev.bdev = NULL;
  }
  
@@@ -517,8 -519,9 +519,8 @@@ int dm_set_device_limits(struct dm_targ
         */
  
        if (q->merge_bvec_fn && !ti->type->merge)
 -              limits->max_sectors =
 -                      min_not_zero(limits->max_sectors,
 -                                   (unsigned int) (PAGE_SIZE >> 9));
 +              blk_limits_max_hw_sectors(limits,
 +                                        (unsigned int) (PAGE_SIZE >> 9));
        return 0;
  }
  EXPORT_SYMBOL_GPL(dm_set_device_limits);
@@@ -1130,6 -1133,11 +1132,6 @@@ void dm_table_set_restrictions(struct d
         */
        q->limits = *limits;
  
 -      if (limits->no_cluster)
 -              queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
 -      else
 -              queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
 -
        if (!dm_table_supports_discards(t))
                queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
        else
diff --combined drivers/md/md.c
@@@ -371,15 -371,10 +371,15 @@@ static void md_end_flush(struct bio *bi
        bio_put(bio);
  }
  
 -static void submit_flushes(mddev_t *mddev)
 +static void md_submit_flush_data(struct work_struct *ws);
 +
 +static void submit_flushes(struct work_struct *ws)
  {
 +      mddev_t *mddev = container_of(ws, mddev_t, flush_work);
        mdk_rdev_t *rdev;
  
 +      INIT_WORK(&mddev->flush_work, md_submit_flush_data);
 +      atomic_set(&mddev->flush_pending, 1);
        rcu_read_lock();
        list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
                if (rdev->raid_disk >= 0 &&
                        rdev_dec_pending(rdev, mddev);
                }
        rcu_read_unlock();
 +      if (atomic_dec_and_test(&mddev->flush_pending))
 +              queue_work(md_wq, &mddev->flush_work);
  }
  
  static void md_submit_flush_data(struct work_struct *ws)
        mddev_t *mddev = container_of(ws, mddev_t, flush_work);
        struct bio *bio = mddev->flush_bio;
  
 -      atomic_set(&mddev->flush_pending, 1);
 -
        if (bio->bi_size == 0)
                /* an empty barrier - all done */
                bio_endio(bio, 0);
                if (mddev->pers->make_request(mddev, bio))
                        generic_make_request(bio);
        }
 -      if (atomic_dec_and_test(&mddev->flush_pending)) {
 -              mddev->flush_bio = NULL;
 -              wake_up(&mddev->sb_wait);
 -      }
 +
 +      mddev->flush_bio = NULL;
 +      wake_up(&mddev->sb_wait);
  }
  
  void md_flush_request(mddev_t *mddev, struct bio *bio)
        mddev->flush_bio = bio;
        spin_unlock_irq(&mddev->write_lock);
  
 -      atomic_set(&mddev->flush_pending, 1);
 -      INIT_WORK(&mddev->flush_work, md_submit_flush_data);
 -
 -      submit_flushes(mddev);
 -
 -      if (atomic_dec_and_test(&mddev->flush_pending))
 -              queue_work(md_wq, &mddev->flush_work);
 +      INIT_WORK(&mddev->flush_work, submit_flushes);
 +      queue_work(md_wq, &mddev->flush_work);
  }
  EXPORT_SYMBOL(md_flush_request);
  
@@@ -1336,7 -1337,7 +1336,7 @@@ super_90_rdev_size_change(mdk_rdev_t *r
        md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
                       rdev->sb_page);
        md_super_wait(rdev->mddev);
 -      return num_sectors / 2; /* kB for sysfs */
 +      return num_sectors;
  }
  
  
@@@ -1703,7 -1704,7 +1703,7 @@@ super_1_rdev_size_change(mdk_rdev_t *rd
        md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
                       rdev->sb_page);
        md_super_wait(rdev->mddev);
 -      return num_sectors / 2; /* kB for sysfs */
 +      return num_sectors;
  }
  
  static struct super_type super_types[] = {
@@@ -1879,7 -1880,7 +1879,7 @@@ static int bind_rdev_to_array(mdk_rdev_
        rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
  
        list_add_rcu(&rdev->same_set, &mddev->disks);
-       bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
+       bd_link_disk_holder(rdev->bdev, mddev->gendisk);
  
        /* May as well allow recovery to be retried once */
        mddev->recovery_disabled = 0;
@@@ -1906,7 -1907,6 +1906,6 @@@ static void unbind_rdev_from_array(mdk_
                MD_BUG();
                return;
        }
-       bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
        list_del_rcu(&rdev->same_set);
        printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
        rdev->mddev = NULL;
@@@ -1934,19 -1934,13 +1933,13 @@@ static int lock_rdev(mdk_rdev_t *rdev, 
        struct block_device *bdev;
        char b[BDEVNAME_SIZE];
  
-       bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+       bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+                                shared ? (mdk_rdev_t *)lock_rdev : rdev);
        if (IS_ERR(bdev)) {
                printk(KERN_ERR "md: could not open %s.\n",
                        __bdevname(dev, b));
                return PTR_ERR(bdev);
        }
-       err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
-       if (err) {
-               printk(KERN_ERR "md: could not bd_claim %s.\n",
-                       bdevname(bdev, b));
-               blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
-               return err;
-       }
        if (!shared)
                set_bit(AllReserved, &rdev->flags);
        rdev->bdev = bdev;
@@@ -1959,8 -1953,7 +1952,7 @@@ static void unlock_rdev(mdk_rdev_t *rde
        rdev->bdev = NULL;
        if (!bdev)
                MD_BUG();
-       bd_release(bdev);
-       blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
+       blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
  }
  
  void md_autodetect_dev(dev_t dev);
@@@ -4295,6 -4288,9 +4287,6 @@@ static int md_alloc(dev_t dev, char *na
                goto abort;
        mddev->queue->queuedata = mddev;
  
 -      /* Can be unlocked because the queue is new: no concurrency */
 -      queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
 -
        blk_queue_make_request(mddev->queue, md_make_request);
  
        disk = alloc_disk(1 << shift);
        if (mddev->kobj.sd &&
            sysfs_create_group(&mddev->kobj, &md_bitmap_group))
                printk(KERN_DEBUG "pointless warning\n");
 +
 +      blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
   abort:
        mutex_unlock(&disks_mutex);
        if (!error && mddev->kobj.sd) {
@@@ -5156,7 -5150,7 +5148,7 @@@ static int add_new_disk(mddev_t * mddev
                                PTR_ERR(rdev));
                        return PTR_ERR(rdev);
                }
 -              /* set save_raid_disk if appropriate */
 +              /* set saved_raid_disk if appropriate */
                if (!mddev->persistent) {
                        if (info->state & (1<<MD_DISK_SYNC)  &&
                            info->raid_disk < mddev->raid_disks)
                } else
                        super_types[mddev->major_version].
                                validate_super(mddev, rdev);
 -              rdev->saved_raid_disk = rdev->raid_disk;
 +              if (test_bit(In_sync, &rdev->flags))
 +                      rdev->saved_raid_disk = rdev->raid_disk;
 +              else
 +                      rdev->saved_raid_disk = -1;
  
                clear_bit(In_sync, &rdev->flags); /* just to be sure */
                if (info->state & (1<<MD_DISK_WRITEMOSTLY))
@@@ -6041,8 -6032,9 +6033,8 @@@ static int md_thread(void * arg
                         || kthread_should_stop(),
                         thread->timeout);
  
 -              clear_bit(THREAD_WAKEUP, &thread->flags);
 -
 -              thread->run(thread->mddev);
 +              if (test_and_clear_bit(THREAD_WAKEUP, &thread->flags))
 +                      thread->run(thread->mddev);
        }
  
        return 0;
diff --combined drivers/scsi/scsi_lib.c
@@@ -1278,10 -1278,11 +1278,10 @@@ static inline int scsi_target_queue_rea
        }
  
        if (scsi_target_is_busy(starget)) {
 -              if (list_empty(&sdev->starved_entry)) {
 +              if (list_empty(&sdev->starved_entry))
                        list_add_tail(&sdev->starved_entry,
                                      &shost->starved_list);
 -                      return 0;
 -              }
 +              return 0;
        }
  
        /* We're OK to process the command, so we can't be starved */
@@@ -1402,6 -1403,11 +1402,6 @@@ static void scsi_softirq_done(struct re
  
        INIT_LIST_HEAD(&cmd->eh_entry);
  
 -      /*
 -       * Set the serial numbers back to zero
 -       */
 -      cmd->serial_number = 0;
 -
        atomic_inc(&cmd->device->iodone_cnt);
        if (cmd->result)
                atomic_inc(&cmd->device->ioerr_cnt);
@@@ -1636,8 -1642,9 +1636,8 @@@ struct request_queue *__scsi_alloc_queu
  
        blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
  
 -      /* New queue, no concurrency on queue_flags */
        if (!shost->use_clustering)
 -              queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
 +              q->limits.cluster = 0;
  
        /*
         * set a reasonable default alignment on word boundaries: the
@@@ -1977,8 -1984,7 +1977,7 @@@ EXPORT_SYMBOL(scsi_mode_sense)
   *            in.
   *
   *    Returns zero if unsuccessful or an error if TUR failed.  For
-  *    removable media, a return of NOT_READY or UNIT_ATTENTION is
-  *    translated to success, with the ->changed flag updated.
+  *    removable media, UNIT_ATTENTION sets ->changed flag.
   **/
  int
  scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
        } while (scsi_sense_valid(sshdr) &&
                 sshdr->sense_key == UNIT_ATTENTION && --retries);
  
-       if (!sshdr)
-               /* could not allocate sense buffer, so can't process it */
-               return result;
-       if (sdev->removable && scsi_sense_valid(sshdr) &&
-           (sshdr->sense_key == UNIT_ATTENTION ||
-            sshdr->sense_key == NOT_READY)) {
-               sdev->changed = 1;
-               result = 0;
-       }
        if (!sshdr_external)
                kfree(sshdr);
        return result;
diff --combined drivers/scsi/sd.c
@@@ -46,6 -46,7 +46,6 @@@
  #include <linux/blkdev.h>
  #include <linux/blkpg.h>
  #include <linux/delay.h>
 -#include <linux/smp_lock.h>
  #include <linux/mutex.h>
  #include <linux/string_helpers.h>
  #include <linux/async.h>
@@@ -583,7 -584,7 +583,7 @@@ static int sd_prep_fn(struct request_qu
                 * quietly refuse to do anything to a changed disc until 
                 * the changed bit has been reset
                 */
 -              /* printk("SCSI disk has been changed. Prohibiting further I/O.\n"); */
 +              /* printk("SCSI disk has been changed or is not present. Prohibiting further I/O.\n"); */
                goto out;
        }
  
@@@ -1023,6 -1024,7 +1023,6 @@@ static int sd_media_changed(struct gend
         */
        if (!scsi_device_online(sdp)) {
                set_media_not_present(sdkp);
 -              retval = 1;
                goto out;
        }
  
                                              sshdr);
        }
  
-       /*
-        * Unable to test, unit probably not ready.   This usually
-        * means there is no disc in the drive.  Mark as changed,
-        * and we will figure it out later once the drive is
-        * available again.
-        */
-       if (retval || (scsi_sense_valid(sshdr) &&
-                      /* 0x3a is medium not present */
-                      sshdr->asc == 0x3a)) {
+       if (retval) {
                set_media_not_present(sdkp);
 -              retval = 1;
                goto out;
        }
  
         */
        sdkp->media_present = 1;
  
 -      retval = sdp->changed;
 -      sdp->changed = 0;
  out:
 -      if (retval != sdkp->previous_state)
 +      /*
 +       * Report a media change under the following conditions:
 +       *
 +       *      Medium is present now and wasn't present before.
 +       *      Medium wasn't present before and is present now.
 +       *      Medium was present at all times, but it changed while
 +       *              we weren't looking (sdp->changed is set).
 +       *
 +       * If there was no medium before and there is no medium now then
 +       * don't report a change, even if a medium was inserted and removed
 +       * while we weren't looking.
 +       */
 +      retval = (sdkp->media_present != sdkp->previous_state ||
 +                      (sdkp->media_present && sdp->changed));
 +      if (retval)
                sdev_evt_send_simple(sdp, SDEV_EVT_MEDIA_CHANGE, GFP_KERNEL);
 -      sdkp->previous_state = retval;
 +      sdkp->previous_state = sdkp->media_present;
 +
 +      /* sdp->changed indicates medium was changed or is not present */
 +      sdp->changed = !sdkp->media_present;
        kfree(sshdr);
        return retval;
  }
@@@ -1188,12 -1168,6 +1180,12 @@@ static unsigned int sd_completed_bytes(
        u64 end_lba = blk_rq_pos(scmd->request) + (scsi_bufflen(scmd) / 512);
        u64 bad_lba;
        int info_valid;
 +      /*
 +       * resid is optional but mostly filled in.  When it's unused,
 +       * its value is zero, so we assume the whole buffer transferred
 +       */
 +      unsigned int transferred = scsi_bufflen(scmd) - scsi_get_resid(scmd);
 +      unsigned int good_bytes;
  
        if (scmd->request->cmd_type != REQ_TYPE_FS)
                return 0;
        /* This computation should always be done in terms of
         * the resolution of the device's medium.
         */
 -      return (bad_lba - start_lba) * scmd->device->sector_size;
 +      good_bytes = (bad_lba - start_lba) * scmd->device->sector_size;
 +      return min(good_bytes, transferred);
  }
  
  /**
@@@ -1922,14 -1895,10 +1914,14 @@@ sd_read_cache_type(struct scsi_disk *sd
        int old_rcd = sdkp->RCD;
        int old_dpofua = sdkp->DPOFUA;
  
 -      if (sdp->skip_ms_page_8)
 -              goto defaults;
 -
 -      if (sdp->type == TYPE_RBC) {
 +      if (sdp->skip_ms_page_8) {
 +              if (sdp->type == TYPE_RBC)
 +                      goto defaults;
 +              else {
 +                      modepage = 0x3F;
 +                      dbd = 0;
 +              }
 +      } else if (sdp->type == TYPE_RBC) {
                modepage = 6;
                dbd = 8;
        } else {
         */
        if (len < 3)
                goto bad_sense;
 -      if (len > 20)
 -              len = 20;
 -
 -      /* Take headers and block descriptors into account */
 -      len += data.header_length + data.block_descriptor_length;
 -      if (len > SD_BUF_SIZE)
 -              goto bad_sense;
 +      else if (len > SD_BUF_SIZE) {
 +              sd_printk(KERN_NOTICE, sdkp, "Truncating mode parameter "
 +                        "data from %d to %d bytes\n", len, SD_BUF_SIZE);
 +              len = SD_BUF_SIZE;
 +      }
  
        /* Get the data */
        res = sd_do_mode_sense(sdp, dbd, modepage, buffer, len, &data, &sshdr);
        if (scsi_status_is_good(res)) {
                int offset = data.header_length + data.block_descriptor_length;
  
 -              if (offset >= SD_BUF_SIZE - 2) {
 -                      sd_printk(KERN_ERR, sdkp, "Malformed MODE SENSE response\n");
 -                      goto defaults;
 +              while (offset < len) {
 +                      u8 page_code = buffer[offset] & 0x3F;
 +                      u8 spf       = buffer[offset] & 0x40;
 +
 +                      if (page_code == 8 || page_code == 6) {
 +                              /* We're interested only in the first 3 bytes.
 +                               */
 +                              if (len - offset <= 2) {
 +                                      sd_printk(KERN_ERR, sdkp, "Incomplete "
 +                                                "mode parameter data\n");
 +                                      goto defaults;
 +                              } else {
 +                                      modepage = page_code;
 +                                      goto Page_found;
 +                              }
 +                      } else {
 +                              /* Go to the next page */
 +                              if (spf && len - offset > 3)
 +                                      offset += 4 + (buffer[offset+2] << 8) +
 +                                              buffer[offset+3];
 +                              else if (!spf && len - offset > 1)
 +                                      offset += 2 + buffer[offset+1];
 +                              else {
 +                                      sd_printk(KERN_ERR, sdkp, "Incomplete "
 +                                                "mode parameter data\n");
 +                                      goto defaults;
 +                              }
 +                      }
                }
  
 -              if ((buffer[offset] & 0x3f) != modepage) {
 +              if (modepage == 0x3F) {
 +                      sd_printk(KERN_ERR, sdkp, "No Caching mode page "
 +                                "present\n");
 +                      goto defaults;
 +              } else if ((buffer[offset] & 0x3f) != modepage) {
                        sd_printk(KERN_ERR, sdkp, "Got wrong page\n");
                        goto defaults;
                }
 -
 +      Page_found:
                if (modepage == 8) {
                        sdkp->WCE = ((buffer[offset + 2] & 0x04) != 0);
                        sdkp->RCD = ((buffer[offset + 2] & 0x01) != 0);
diff --combined fs/block_dev.c
@@@ -11,6 -11,7 +11,6 @@@
  #include <linux/slab.h>
  #include <linux/kmod.h>
  #include <linux/major.h>
 -#include <linux/smp_lock.h>
  #include <linux/device_cgroup.h>
  #include <linux/highmem.h>
  #include <linux/blkdev.h>
@@@ -409,20 -410,13 +409,20 @@@ static struct inode *bdev_alloc_inode(s
        return &ei->vfs_inode;
  }
  
 -static void bdev_destroy_inode(struct inode *inode)
 +static void bdev_i_callback(struct rcu_head *head)
  {
 +      struct inode *inode = container_of(head, struct inode, i_rcu);
        struct bdev_inode *bdi = BDEV_I(inode);
  
 +      INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(bdev_cachep, bdi);
  }
  
 +static void bdev_destroy_inode(struct inode *inode)
 +{
 +      call_rcu(&inode->i_rcu, bdev_i_callback);
 +}
 +
  static void init_once(void *foo)
  {
        struct bdev_inode *ei = (struct bdev_inode *) foo;
        mutex_init(&bdev->bd_mutex);
        INIT_LIST_HEAD(&bdev->bd_inodes);
        INIT_LIST_HEAD(&bdev->bd_list);
- #ifdef CONFIG_SYSFS
-       INIT_LIST_HEAD(&bdev->bd_holder_list);
- #endif
        inode_init_once(&ei->vfs_inode);
        /* Initialize mutex for freeze. */
        mutex_init(&bdev->bd_fsfreeze_mutex);
@@@ -473,7 -464,7 +470,7 @@@ static const struct super_operations bd
  static struct dentry *bd_mount(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data)
  {
 -      return mount_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576);
 +      return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, 0x62646576);
  }
  
  static struct file_system_type bd_type = {
@@@ -669,7 -660,7 +666,7 @@@ static bool bd_may_claim(struct block_d
        else if (bdev->bd_contains == bdev)
                return true;     /* is a whole device which isn't held */
  
-       else if (whole->bd_holder == bd_claim)
+       else if (whole->bd_holder == bd_may_claim)
                return true;     /* is a partition of a device that is being partitioned */
        else if (whole->bd_holder != NULL)
                return false;    /* is a partition of a held device */
@@@ -781,439 -772,87 +778,87 @@@ static struct block_device *bd_start_cl
        }
  }
  
- /* releases bdev_lock */
- static void __bd_abort_claiming(struct block_device *whole, void *holder)
- {
-       BUG_ON(whole->bd_claiming != holder);
-       whole->bd_claiming = NULL;
-       wake_up_bit(&whole->bd_claiming, 0);
-       spin_unlock(&bdev_lock);
-       bdput(whole);
- }
- /**
-  * bd_abort_claiming - abort claiming a block device
-  * @whole: whole block device returned by bd_start_claiming()
-  * @holder: holder trying to claim @bdev
-  *
-  * Abort a claiming block started by bd_start_claiming().  Note that
-  * @whole is not the block device to be claimed but the whole device
-  * returned by bd_start_claiming().
-  *
-  * CONTEXT:
-  * Grabs and releases bdev_lock.
-  */
- static void bd_abort_claiming(struct block_device *whole, void *holder)
- {
-       spin_lock(&bdev_lock);
-       __bd_abort_claiming(whole, holder);             /* releases bdev_lock */
- }
- /* increment holders when we have a legitimate claim. requires bdev_lock */
- static void __bd_claim(struct block_device *bdev, struct block_device *whole,
-                                       void *holder)
- {
-       /* note that for a whole device bd_holders
-        * will be incremented twice, and bd_holder will
-        * be set to bd_claim before being set to holder
-        */
-       whole->bd_holders++;
-       whole->bd_holder = bd_claim;
-       bdev->bd_holders++;
-       bdev->bd_holder = holder;
- }
- /**
-  * bd_finish_claiming - finish claiming a block device
-  * @bdev: block device of interest (passed to bd_start_claiming())
-  * @whole: whole block device returned by bd_start_claiming()
-  * @holder: holder trying to claim @bdev
-  *
-  * Finish a claiming block started by bd_start_claiming().
-  *
-  * CONTEXT:
-  * Grabs and releases bdev_lock.
-  */
- static void bd_finish_claiming(struct block_device *bdev,
-                               struct block_device *whole, void *holder)
- {
-       spin_lock(&bdev_lock);
-       BUG_ON(!bd_may_claim(bdev, whole, holder));
-       __bd_claim(bdev, whole, holder);
-       __bd_abort_claiming(whole, holder); /* not actually an abort */
- }
- /**
-  * bd_claim - claim a block device
-  * @bdev: block device to claim
-  * @holder: holder trying to claim @bdev
-  *
-  * Try to claim @bdev which must have been opened successfully.
-  *
-  * CONTEXT:
-  * Might sleep.
-  *
-  * RETURNS:
-  * 0 if successful, -EBUSY if @bdev is already claimed.
-  */
- int bd_claim(struct block_device *bdev, void *holder)
- {
-       struct block_device *whole = bdev->bd_contains;
-       int res;
-       might_sleep();
-       spin_lock(&bdev_lock);
-       res = bd_prepare_to_claim(bdev, whole, holder);
-       if (res == 0)
-               __bd_claim(bdev, whole, holder);
-       spin_unlock(&bdev_lock);
-       return res;
- }
- EXPORT_SYMBOL(bd_claim);
- void bd_release(struct block_device *bdev)
- {
-       spin_lock(&bdev_lock);
-       if (!--bdev->bd_contains->bd_holders)
-               bdev->bd_contains->bd_holder = NULL;
-       if (!--bdev->bd_holders)
-               bdev->bd_holder = NULL;
-       spin_unlock(&bdev_lock);
- }
- EXPORT_SYMBOL(bd_release);
  #ifdef CONFIG_SYSFS
- /*
-  * Functions for bd_claim_by_kobject / bd_release_from_kobject
-  *
-  *     If a kobject is passed to bd_claim_by_kobject()
-  *     and the kobject has a parent directory,
-  *     following symlinks are created:
-  *        o from the kobject to the claimed bdev
-  *        o from "holders" directory of the bdev to the parent of the kobject
-  *     bd_release_from_kobject() removes these symlinks.
-  *
-  *     Example:
-  *        If /dev/dm-0 maps to /dev/sda, kobject corresponding to
-  *        /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then:
-  *           /sys/block/dm-0/slaves/sda --> /sys/block/sda
-  *           /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
-  */
  static int add_symlink(struct kobject *from, struct kobject *to)
  {
-       if (!from || !to)
-               return 0;
        return sysfs_create_link(from, to, kobject_name(to));
  }
  
  static void del_symlink(struct kobject *from, struct kobject *to)
  {
-       if (!from || !to)
-               return;
        sysfs_remove_link(from, kobject_name(to));
  }
  
- /*
-  * 'struct bd_holder' contains pointers to kobjects symlinked by
-  * bd_claim_by_kobject.
-  * It's connected to bd_holder_list which is protected by bdev->bd_sem.
-  */
- struct bd_holder {
-       struct list_head list;  /* chain of holders of the bdev */
-       int count;              /* references from the holder */
-       struct kobject *sdir;   /* holder object, e.g. "/block/dm-0/slaves" */
-       struct kobject *hdev;   /* e.g. "/block/dm-0" */
-       struct kobject *hdir;   /* e.g. "/block/sda/holders" */
-       struct kobject *sdev;   /* e.g. "/block/sda" */
- };
- /*
-  * Get references of related kobjects at once.
-  * Returns 1 on success. 0 on failure.
-  *
-  * Should call bd_holder_release_dirs() after successful use.
-  */
- static int bd_holder_grab_dirs(struct block_device *bdev,
-                       struct bd_holder *bo)
- {
-       if (!bdev || !bo)
-               return 0;
-       bo->sdir = kobject_get(bo->sdir);
-       if (!bo->sdir)
-               return 0;
-       bo->hdev = kobject_get(bo->sdir->parent);
-       if (!bo->hdev)
-               goto fail_put_sdir;
-       bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj);
-       if (!bo->sdev)
-               goto fail_put_hdev;
-       bo->hdir = kobject_get(bdev->bd_part->holder_dir);
-       if (!bo->hdir)
-               goto fail_put_sdev;
-       return 1;
- fail_put_sdev:
-       kobject_put(bo->sdev);
- fail_put_hdev:
-       kobject_put(bo->hdev);
- fail_put_sdir:
-       kobject_put(bo->sdir);
-       return 0;
- }
- /* Put references of related kobjects at once. */
- static void bd_holder_release_dirs(struct bd_holder *bo)
- {
-       kobject_put(bo->hdir);
-       kobject_put(bo->sdev);
-       kobject_put(bo->hdev);
-       kobject_put(bo->sdir);
- }
- static struct bd_holder *alloc_bd_holder(struct kobject *kobj)
- {
-       struct bd_holder *bo;
-       bo = kzalloc(sizeof(*bo), GFP_KERNEL);
-       if (!bo)
-               return NULL;
-       bo->count = 1;
-       bo->sdir = kobj;
-       return bo;
- }
- static void free_bd_holder(struct bd_holder *bo)
- {
-       kfree(bo);
- }
  /**
-  * find_bd_holder - find matching struct bd_holder from the block device
+  * bd_link_disk_holder - create symlinks between holding disk and slave bdev
+  * @bdev: the claimed slave bdev
+  * @disk: the holding disk
   *
-  * @bdev:     struct block device to be searched
-  * @bo:               target struct bd_holder
-  *
-  * Returns matching entry with @bo in @bdev->bd_holder_list.
-  * If found, increment the reference count and return the pointer.
-  * If not found, returns NULL.
-  */
- static struct bd_holder *find_bd_holder(struct block_device *bdev,
-                                       struct bd_holder *bo)
- {
-       struct bd_holder *tmp;
-       list_for_each_entry(tmp, &bdev->bd_holder_list, list)
-               if (tmp->sdir == bo->sdir) {
-                       tmp->count++;
-                       return tmp;
-               }
-       return NULL;
- }
- /**
-  * add_bd_holder - create sysfs symlinks for bd_claim() relationship
-  *
-  * @bdev:     block device to be bd_claimed
-  * @bo:               preallocated and initialized by alloc_bd_holder()
-  *
-  * Add @bo to @bdev->bd_holder_list, create symlinks.
-  *
-  * Returns 0 if symlinks are created.
-  * Returns -ve if something fails.
-  */
- static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
- {
-       int err;
-       if (!bo)
-               return -EINVAL;
-       if (!bd_holder_grab_dirs(bdev, bo))
-               return -EBUSY;
-       err = add_symlink(bo->sdir, bo->sdev);
-       if (err)
-               return err;
-       err = add_symlink(bo->hdir, bo->hdev);
-       if (err) {
-               del_symlink(bo->sdir, bo->sdev);
-               return err;
-       }
-       list_add_tail(&bo->list, &bdev->bd_holder_list);
-       return 0;
- }
- /**
-  * del_bd_holder - delete sysfs symlinks for bd_claim() relationship
+  * This functions creates the following sysfs symlinks.
   *
-  * @bdev:     block device to be bd_claimed
-  * @kobj:     holder's kobject
+  * - from "slaves" directory of the holder @disk to the claimed @bdev
+  * - from "holders" directory of the @bdev to the holder @disk
   *
-  * If there is matching entry with @kobj in @bdev->bd_holder_list
-  * and no other bd_claim() from the same kobject,
-  * remove the struct bd_holder from the list, delete symlinks for it.
+  * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is
+  * passed to bd_link_disk_holder(), then:
   *
-  * Returns a pointer to the struct bd_holder when it's removed from the list
-  * and ready to be freed.
-  * Returns NULL if matching claim isn't found or there is other bd_claim()
-  * by the same kobject.
-  */
- static struct bd_holder *del_bd_holder(struct block_device *bdev,
-                                       struct kobject *kobj)
- {
-       struct bd_holder *bo;
-       list_for_each_entry(bo, &bdev->bd_holder_list, list) {
-               if (bo->sdir == kobj) {
-                       bo->count--;
-                       BUG_ON(bo->count < 0);
-                       if (!bo->count) {
-                               list_del(&bo->list);
-                               del_symlink(bo->sdir, bo->sdev);
-                               del_symlink(bo->hdir, bo->hdev);
-                               bd_holder_release_dirs(bo);
-                               return bo;
-                       }
-                       break;
-               }
-       }
-       return NULL;
- }
- /**
-  * bd_claim_by_kobject - bd_claim() with additional kobject signature
+  *   /sys/block/dm-0/slaves/sda --> /sys/block/sda
+  *   /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
   *
-  * @bdev:     block device to be claimed
-  * @holder:   holder's signature
-  * @kobj:     holder's kobject
+  * The caller must have claimed @bdev before calling this function and
+  * ensure that both @bdev and @disk are valid during the creation and
+  * lifetime of these symlinks.
   *
-  * Do bd_claim() and if it succeeds, create sysfs symlinks between
-  * the bdev and the holder's kobject.
-  * Use bd_release_from_kobject() when relesing the claimed bdev.
+  * CONTEXT:
+  * Might sleep.
   *
-  * Returns 0 on success. (same as bd_claim())
-  * Returns errno on failure.
+  * RETURNS:
+  * 0 on success, -errno on failure.
   */
- static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
-                               struct kobject *kobj)
+ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
  {
-       int err;
-       struct bd_holder *bo, *found;
-       if (!kobj)
-               return -EINVAL;
-       bo = alloc_bd_holder(kobj);
-       if (!bo)
-               return -ENOMEM;
+       int ret = 0;
  
        mutex_lock(&bdev->bd_mutex);
  
-       err = bd_claim(bdev, holder);
-       if (err)
-               goto fail;
+       WARN_ON_ONCE(!bdev->bd_holder || bdev->bd_holder_disk);
  
-       found = find_bd_holder(bdev, bo);
-       if (found)
-               goto fail;
+       /* FIXME: remove the following once add_disk() handles errors */
+       if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
+               goto out_unlock;
  
-       err = add_bd_holder(bdev, bo);
-       if (err)
-               bd_release(bdev);
-       else
-               bo = NULL;
- fail:
-       mutex_unlock(&bdev->bd_mutex);
-       free_bd_holder(bo);
-       return err;
- }
+       ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+       if (ret)
+               goto out_unlock;
  
- /**
-  * bd_release_from_kobject - bd_release() with additional kobject signature
-  *
-  * @bdev:     block device to be released
-  * @kobj:     holder's kobject
-  *
-  * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject().
-  */
- static void bd_release_from_kobject(struct block_device *bdev,
-                                       struct kobject *kobj)
- {
-       if (!kobj)
-               return;
+       ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
+       if (ret) {
+               del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+               goto out_unlock;
+       }
  
-       mutex_lock(&bdev->bd_mutex);
-       bd_release(bdev);
-       free_bd_holder(del_bd_holder(bdev, kobj));
+       bdev->bd_holder_disk = disk;
+ out_unlock:
        mutex_unlock(&bdev->bd_mutex);
+       return ret;
  }
+ EXPORT_SYMBOL_GPL(bd_link_disk_holder);
  
- /**
-  * bd_claim_by_disk - wrapper function for bd_claim_by_kobject()
-  *
-  * @bdev:     block device to be claimed
-  * @holder:   holder's signature
-  * @disk:     holder's gendisk
-  *
-  * Call bd_claim_by_kobject() with getting @disk->slave_dir.
-  */
- int bd_claim_by_disk(struct block_device *bdev, void *holder,
-                       struct gendisk *disk)
+ static void bd_unlink_disk_holder(struct block_device *bdev)
  {
-       return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir));
- }
- EXPORT_SYMBOL_GPL(bd_claim_by_disk);
+       struct gendisk *disk = bdev->bd_holder_disk;
  
- /**
-  * bd_release_from_disk - wrapper function for bd_release_from_kobject()
-  *
-  * @bdev:     block device to be claimed
-  * @disk:     holder's gendisk
-  *
-  * Call bd_release_from_kobject() and put @disk->slave_dir.
-  */
- void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk)
- {
-       bd_release_from_kobject(bdev, disk->slave_dir);
-       kobject_put(disk->slave_dir);
- }
- EXPORT_SYMBOL_GPL(bd_release_from_disk);
- #endif
+       bdev->bd_holder_disk = NULL;
+       if (!disk)
+               return;
  
- /*
-  * Tries to open block device by device number.  Use it ONLY if you
-  * really do not have anything better - i.e. when you are behind a
-  * truly sucky interface and all you are given is a device number.  _Never_
-  * to be used for internal purposes.  If you ever need it - reconsider
-  * your API.
-  */
- struct block_device *open_by_devnum(dev_t dev, fmode_t mode)
- {
-       struct block_device *bdev = bdget(dev);
-       int err = -ENOMEM;
-       if (bdev)
-               err = blkdev_get(bdev, mode);
-       return err ? ERR_PTR(err) : bdev;
+       del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+       del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
  }
- EXPORT_SYMBOL(open_by_devnum);
+ #else
+ static inline void bd_unlink_disk_holder(struct block_device *bdev)
+ { }
+ #endif
  
  /**
   * flush_disk - invalidates all buffer-cache entries on a disk
@@@ -1309,10 -948,11 +954,11 @@@ int check_disk_change(struct block_devi
  {
        struct gendisk *disk = bdev->bd_disk;
        const struct block_device_operations *bdops = disk->fops;
+       unsigned int events;
  
-       if (!bdops->media_changed)
-               return 0;
-       if (!bdops->media_changed(bdev->bd_disk))
+       events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
+                                  DISK_EVENT_EJECT_REQUEST);
+       if (!(events & DISK_EVENT_MEDIA_CHANGE))
                return 0;
  
        flush_disk(bdev);
@@@ -1475,17 -1115,171 +1121,171 @@@ static int __blkdev_get(struct block_de
        return ret;
  }
  
- int blkdev_get(struct block_device *bdev, fmode_t mode)
+ /**
+  * blkdev_get - open a block device
+  * @bdev: block_device to open
+  * @mode: FMODE_* mask
+  * @holder: exclusive holder identifier
+  *
+  * Open @bdev with @mode.  If @mode includes %FMODE_EXCL, @bdev is
+  * open with exclusive access.  Specifying %FMODE_EXCL with %NULL
+  * @holder is invalid.  Exclusive opens may nest for the same @holder.
+  *
+  * On success, the reference count of @bdev is unchanged.  On failure,
+  * @bdev is put.
+  *
+  * CONTEXT:
+  * Might sleep.
+  *
+  * RETURNS:
+  * 0 on success, -errno on failure.
+  */
+ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
  {
-       return __blkdev_get(bdev, mode, 0);
+       struct block_device *whole = NULL;
+       int res;
+       WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
+       if ((mode & FMODE_EXCL) && holder) {
+               whole = bd_start_claiming(bdev, holder);
+               if (IS_ERR(whole)) {
+                       bdput(bdev);
+                       return PTR_ERR(whole);
+               }
+       }
+       res = __blkdev_get(bdev, mode, 0);
+       /* __blkdev_get() may alter read only status, check it afterwards */
+       if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
+               __blkdev_put(bdev, mode, 0);
+               res = -EACCES;
+       }
+       if (whole) {
+               /* finish claiming */
+               mutex_lock(&bdev->bd_mutex);
+               spin_lock(&bdev_lock);
+               if (!res) {
+                       BUG_ON(!bd_may_claim(bdev, whole, holder));
+                       /*
+                        * Note that for a whole device bd_holders
+                        * will be incremented twice, and bd_holder
+                        * will be set to bd_may_claim before being
+                        * set to holder
+                        */
+                       whole->bd_holders++;
+                       whole->bd_holder = bd_may_claim;
+                       bdev->bd_holders++;
+                       bdev->bd_holder = holder;
+               }
+               /* tell others that we're done */
+               BUG_ON(whole->bd_claiming != holder);
+               whole->bd_claiming = NULL;
+               wake_up_bit(&whole->bd_claiming, 0);
+               spin_unlock(&bdev_lock);
+               /*
+                * Block event polling for write claims.  Any write
+                * holder makes the write_holder state stick until all
+                * are released.  This is good enough and tracking
+                * individual writeable reference is too fragile given
+                * the way @mode is used in blkdev_get/put().
+                */
+               if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
+                       bdev->bd_write_holder = true;
+                       disk_block_events(bdev->bd_disk);
+               }
+               mutex_unlock(&bdev->bd_mutex);
+               bdput(whole);
+       }
+       return res;
  }
  EXPORT_SYMBOL(blkdev_get);
  
+ /**
+  * blkdev_get_by_path - open a block device by name
+  * @path: path to the block device to open
+  * @mode: FMODE_* mask
+  * @holder: exclusive holder identifier
+  *
+  * Open the blockdevice described by the device file at @path.  @mode
+  * and @holder are identical to blkdev_get().
+  *
+  * On success, the returned block_device has reference count of one.
+  *
+  * CONTEXT:
+  * Might sleep.
+  *
+  * RETURNS:
+  * Pointer to block_device on success, ERR_PTR(-errno) on failure.
+  */
+ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+                                       void *holder)
+ {
+       struct block_device *bdev;
+       int err;
+       bdev = lookup_bdev(path);
+       if (IS_ERR(bdev))
+               return bdev;
+       err = blkdev_get(bdev, mode, holder);
+       if (err)
+               return ERR_PTR(err);
+       return bdev;
+ }
+ EXPORT_SYMBOL(blkdev_get_by_path);
+ /**
+  * blkdev_get_by_dev - open a block device by device number
+  * @dev: device number of block device to open
+  * @mode: FMODE_* mask
+  * @holder: exclusive holder identifier
+  *
+  * Open the blockdevice described by device number @dev.  @mode and
+  * @holder are identical to blkdev_get().
+  *
+  * Use it ONLY if you really do not have anything better - i.e. when
+  * you are behind a truly sucky interface and all you are given is a
+  * device number.  _Never_ to be used for internal purposes.  If you
+  * ever need it - reconsider your API.
+  *
+  * On success, the returned block_device has reference count of one.
+  *
+  * CONTEXT:
+  * Might sleep.
+  *
+  * RETURNS:
+  * Pointer to block_device on success, ERR_PTR(-errno) on failure.
+  */
+ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
+ {
+       struct block_device *bdev;
+       int err;
+       bdev = bdget(dev);
+       if (!bdev)
+               return ERR_PTR(-ENOMEM);
+       err = blkdev_get(bdev, mode, holder);
+       if (err)
+               return ERR_PTR(err);
+       return bdev;
+ }
+ EXPORT_SYMBOL(blkdev_get_by_dev);
  static int blkdev_open(struct inode * inode, struct file * filp)
  {
-       struct block_device *whole = NULL;
        struct block_device *bdev;
-       int res;
  
        /*
         * Preserve backwards compatibility and allow large file access
        if (bdev == NULL)
                return -ENOMEM;
  
-       if (filp->f_mode & FMODE_EXCL) {
-               whole = bd_start_claiming(bdev, filp);
-               if (IS_ERR(whole)) {
-                       bdput(bdev);
-                       return PTR_ERR(whole);
-               }
-       }
        filp->f_mapping = bdev->bd_inode->i_mapping;
  
-       res = blkdev_get(bdev, filp->f_mode);
-       if (whole) {
-               if (res == 0)
-                       bd_finish_claiming(bdev, whole, filp);
-               else
-                       bd_abort_claiming(whole, filp);
-       }
-       return res;
+       return blkdev_get(bdev, filp->f_mode, filp);
  }
  
  static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
                bdev->bd_part_count--;
  
        if (!--bdev->bd_openers) {
+               WARN_ON_ONCE(bdev->bd_holders);
                sync_blockdev(bdev);
                kill_bdev(bdev);
        }
  
  int blkdev_put(struct block_device *bdev, fmode_t mode)
  {
+       if (mode & FMODE_EXCL) {
+               bool bdev_free;
+               /*
+                * Release a claim on the device.  The holder fields
+                * are protected with bdev_lock.  bd_mutex is to
+                * synchronize disk_holder unlinking.
+                */
+               mutex_lock(&bdev->bd_mutex);
+               spin_lock(&bdev_lock);
+               WARN_ON_ONCE(--bdev->bd_holders < 0);
+               WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
+               /* bd_contains might point to self, check in a separate step */
+               if ((bdev_free = !bdev->bd_holders))
+                       bdev->bd_holder = NULL;
+               if (!bdev->bd_contains->bd_holders)
+                       bdev->bd_contains->bd_holder = NULL;
+               spin_unlock(&bdev_lock);
+               /*
+                * If this was the last claim, remove holder link and
+                * unblock evpoll if it was a write holder.
+                */
+               if (bdev_free) {
+                       bd_unlink_disk_holder(bdev);
+                       if (bdev->bd_write_holder) {
+                               disk_unblock_events(bdev->bd_disk);
+                               bdev->bd_write_holder = false;
+                       } else
+                               disk_check_events(bdev->bd_disk);
+               }
+               mutex_unlock(&bdev->bd_mutex);
+       } else
+               disk_check_events(bdev->bd_disk);
        return __blkdev_put(bdev, mode, 0);
  }
  EXPORT_SYMBOL(blkdev_put);
  static int blkdev_close(struct inode * inode, struct file * filp)
  {
        struct block_device *bdev = I_BDEV(filp->f_mapping->host);
-       if (bdev->bd_holder == filp)
-               bd_release(bdev);
        return blkdev_put(bdev, filp->f_mode);
  }
  
@@@ -1722,67 -1538,6 +1544,6 @@@ fail
  }
  EXPORT_SYMBOL(lookup_bdev);
  
- /**
-  * open_bdev_exclusive  -  open a block device by name and set it up for use
-  *
-  * @path:     special file representing the block device
-  * @mode:     FMODE_... combination to pass be used
-  * @holder:   owner for exclusion
-  *
-  * Open the blockdevice described by the special file at @path, claim it
-  * for the @holder.
-  */
- struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
- {
-       struct block_device *bdev, *whole;
-       int error;
-       bdev = lookup_bdev(path);
-       if (IS_ERR(bdev))
-               return bdev;
-       whole = bd_start_claiming(bdev, holder);
-       if (IS_ERR(whole)) {
-               bdput(bdev);
-               return whole;
-       }
-       error = blkdev_get(bdev, mode);
-       if (error)
-               goto out_abort_claiming;
-       error = -EACCES;
-       if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
-               goto out_blkdev_put;
-       bd_finish_claiming(bdev, whole, holder);
-       return bdev;
- out_blkdev_put:
-       blkdev_put(bdev, mode);
- out_abort_claiming:
-       bd_abort_claiming(whole, holder);
-       return ERR_PTR(error);
- }
- EXPORT_SYMBOL(open_bdev_exclusive);
- /**
-  * close_bdev_exclusive  -  close a blockdevice opened by open_bdev_exclusive()
-  *
-  * @bdev:     blockdevice to close
-  * @mode:     mode, must match that used to open.
-  *
-  * This is the counterpart to open_bdev_exclusive().
-  */
- void close_bdev_exclusive(struct block_device *bdev, fmode_t mode)
- {
-       bd_release(bdev);
-       blkdev_put(bdev, mode);
- }
- EXPORT_SYMBOL(close_bdev_exclusive);
  int __invalidate_device(struct block_device *bdev)
  {
        struct super_block *sb = get_super(bdev);
diff --combined fs/btrfs/volumes.c
@@@ -412,16 -412,12 +412,16 @@@ static noinline int device_list_add(con
  
                device->fs_devices = fs_devices;
                fs_devices->num_devices++;
 -      } else if (strcmp(device->name, path)) {
 +      } else if (!device->name || strcmp(device->name, path)) {
                name = kstrdup(path, GFP_NOFS);
                if (!name)
                        return -ENOMEM;
                kfree(device->name);
                device->name = name;
 +              if (device->missing) {
 +                      fs_devices->missing_devices--;
 +                      device->missing = 0;
 +              }
        }
  
        if (found_transid > fs_devices->latest_trans) {
@@@ -493,7 -489,7 +493,7 @@@ again
                        continue;
  
                if (device->bdev) {
-                       close_bdev_exclusive(device->bdev, device->mode);
+                       blkdev_put(device->bdev, device->mode);
                        device->bdev = NULL;
                        fs_devices->open_devices--;
                }
@@@ -527,7 -523,7 +527,7 @@@ static int __btrfs_close_devices(struc
  
        list_for_each_entry(device, &fs_devices->devices, dev_list) {
                if (device->bdev) {
-                       close_bdev_exclusive(device->bdev, device->mode);
+                       blkdev_put(device->bdev, device->mode);
                        fs_devices->open_devices--;
                }
                if (device->writeable) {
@@@ -584,13 -580,15 +584,15 @@@ static int __btrfs_open_devices(struct 
        int seeding = 1;
        int ret = 0;
  
+       flags |= FMODE_EXCL;
        list_for_each_entry(device, head, dev_list) {
                if (device->bdev)
                        continue;
                if (!device->name)
                        continue;
  
-               bdev = open_bdev_exclusive(device->name, flags, holder);
+               bdev = blkdev_get_by_path(device->name, flags, holder);
                if (IS_ERR(bdev)) {
                        printk(KERN_INFO "open %s failed\n", device->name);
                        goto error;
  error_brelse:
                brelse(bh);
  error_close:
-               close_bdev_exclusive(bdev, FMODE_READ);
+               blkdev_put(bdev, flags);
  error:
                continue;
        }
@@@ -688,7 -686,8 +690,8 @@@ int btrfs_scan_one_device(const char *p
  
        mutex_lock(&uuid_mutex);
  
-       bdev = open_bdev_exclusive(path, flags, holder);
+       flags |= FMODE_EXCL;
+       bdev = blkdev_get_by_path(path, flags, holder);
  
        if (IS_ERR(bdev)) {
                ret = PTR_ERR(bdev);
  
        brelse(bh);
  error_close:
-       close_bdev_exclusive(bdev, flags);
+       blkdev_put(bdev, flags);
  error:
        mutex_unlock(&uuid_mutex);
        return ret;
@@@ -1183,8 -1182,8 +1186,8 @@@ int btrfs_rm_device(struct btrfs_root *
                        goto out;
                }
        } else {
-               bdev = open_bdev_exclusive(device_path, FMODE_READ,
-                                     root->fs_info->bdev_holder);
+               bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL,
+                                         root->fs_info->bdev_holder);
                if (IS_ERR(bdev)) {
                        ret = PTR_ERR(bdev);
                        goto out;
  
        device->fs_devices->num_devices--;
  
 +      if (device->missing)
 +              root->fs_info->fs_devices->missing_devices--;
 +
        next_device = list_entry(root->fs_info->fs_devices->devices.next,
                                 struct btrfs_device, dev_list);
        if (device->bdev == root->fs_info->sb->s_bdev)
                root->fs_info->fs_devices->latest_bdev = next_device->bdev;
  
        if (device->bdev) {
-               close_bdev_exclusive(device->bdev, device->mode);
+               blkdev_put(device->bdev, device->mode);
                device->bdev = NULL;
                device->fs_devices->open_devices--;
        }
@@@ -1294,7 -1290,7 +1297,7 @@@ error_brelse
        brelse(bh);
  error_close:
        if (bdev)
-               close_bdev_exclusive(bdev, FMODE_READ);
+               blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
  out:
        mutex_unlock(&root->fs_info->volume_mutex);
        mutex_unlock(&uuid_mutex);
@@@ -1446,7 -1442,8 +1449,8 @@@ int btrfs_init_new_device(struct btrfs_
        if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
                return -EINVAL;
  
-       bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
+       bdev = blkdev_get_by_path(device_path, FMODE_EXCL,
+                                 root->fs_info->bdev_holder);
        if (IS_ERR(bdev))
                return PTR_ERR(bdev);
  
@@@ -1572,7 -1569,7 +1576,7 @@@ out
        mutex_unlock(&root->fs_info->volume_mutex);
        return ret;
  error:
-       close_bdev_exclusive(bdev, 0);
+       blkdev_put(bdev, FMODE_EXCL);
        if (seeding_dev) {
                mutex_unlock(&uuid_mutex);
                up_write(&sb->s_umount);
@@@ -3087,9 -3084,7 +3091,9 @@@ static struct btrfs_device *add_missing
        device->devid = devid;
        device->work.func = pending_bios_fn;
        device->fs_devices = fs_devices;
 +      device->missing = 1;
        fs_devices->num_devices++;
 +      fs_devices->missing_devices++;
        spin_lock_init(&device->io_lock);
        INIT_LIST_HEAD(&device->dev_alloc_list);
        memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
@@@ -3287,15 -3282,6 +3291,15 @@@ static int read_one_dev(struct btrfs_ro
                        device = add_missing_dev(root, devid, dev_uuid);
                        if (!device)
                                return -ENOMEM;
 +              } else if (!device->missing) {
 +                      /*
 +                       * this happens when a device that was properly setup
 +                       * in the device info lists suddenly goes bad.
 +                       * device->bdev is NULL, and so we have to set
 +                       * device->missing to one here
 +                       */
 +                      root->fs_info->fs_devices->missing_devices++;
 +                      device->missing = 1;
                }
        }
  
diff --combined fs/btrfs/volumes.h
@@@ -44,13 -44,12 +44,13 @@@ struct btrfs_device 
  
        int writeable;
        int in_fs_metadata;
 +      int missing;
  
        spinlock_t io_lock;
  
        struct block_device *bdev;
  
-       /* the mode sent to open_bdev_exclusive */
+       /* the mode sent to blkdev_get */
        fmode_t mode;
  
        char *name;
@@@ -94,7 -93,6 +94,7 @@@ struct btrfs_fs_devices 
        u64 num_devices;
        u64 open_devices;
        u64 rw_devices;
 +      u64 missing_devices;
        u64 total_rw_bytes;
        struct block_device *latest_bdev;
  
diff --combined fs/char_dev.c
@@@ -59,7 -59,7 +59,7 @@@ static struct char_device_struct 
  } *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
  
  /* index in the above */
- static inline int major_to_index(int major)
+ static inline int major_to_index(unsigned major)
  {
        return major % CHRDEV_MAJOR_HASH_SIZE;
  }
@@@ -417,6 -417,18 +417,6 @@@ static int chrdev_open(struct inode *in
        return ret;
  }
  
 -int cdev_index(struct inode *inode)
 -{
 -      int idx;
 -      struct kobject *kobj;
 -
 -      kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
 -      if (!kobj)
 -              return -1;
 -      kobject_put(kobj);
 -      return idx;
 -}
 -
  void cd_forget(struct inode *inode)
  {
        spin_lock(&cdev_lock);
@@@ -570,6 -582,7 +570,6 @@@ EXPORT_SYMBOL(cdev_init)
  EXPORT_SYMBOL(cdev_alloc);
  EXPORT_SYMBOL(cdev_del);
  EXPORT_SYMBOL(cdev_add);
 -EXPORT_SYMBOL(cdev_index);
  EXPORT_SYMBOL(__register_chrdev);
  EXPORT_SYMBOL(__unregister_chrdev);
  EXPORT_SYMBOL(directly_mappable_cdev_bdi);
diff --combined fs/ext3/super.c
@@@ -27,6 -27,7 +27,6 @@@
  #include <linux/init.h>
  #include <linux/blkdev.h>
  #include <linux/parser.h>
 -#include <linux/smp_lock.h>
  #include <linux/buffer_head.h>
  #include <linux/exportfs.h>
  #include <linux/vfs.h>
@@@ -143,16 -144,12 +143,16 @@@ void ext3_journal_abort_handle(const ch
  void ext3_msg(struct super_block *sb, const char *prefix,
                const char *fmt, ...)
  {
 +      struct va_format vaf;
        va_list args;
  
        va_start(args, fmt);
 -      printk("%sEXT3-fs (%s): ", prefix, sb->s_id);
 -      vprintk(fmt, args);
 -      printk("\n");
 +
 +      vaf.fmt = fmt;
 +      vaf.va = &args;
 +
 +      printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
 +
        va_end(args);
  }
  
@@@ -199,20 -196,15 +199,20 @@@ static void ext3_handle_error(struct su
                        sb->s_id);
  }
  
 -void ext3_error (struct super_block * sb, const char * function,
 -               const char * fmt, ...)
 +void ext3_error(struct super_block *sb, const char *function,
 +              const char *fmt, ...)
  {
 +      struct va_format vaf;
        va_list args;
  
        va_start(args, fmt);
 -      printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
 -      vprintk(fmt, args);
 -      printk("\n");
 +
 +      vaf.fmt = fmt;
 +      vaf.va = &args;
 +
 +      printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n",
 +             sb->s_id, function, &vaf);
 +
        va_end(args);
  
        ext3_handle_error(sb);
@@@ -283,20 -275,15 +283,20 @@@ void __ext3_std_error (struct super_blo
   * case we take the easy way out and panic immediately.
   */
  
 -void ext3_abort (struct super_block * sb, const char * function,
 -               const char * fmt, ...)
 +void ext3_abort(struct super_block *sb, const char *function,
 +               const char *fmt, ...)
  {
 +      struct va_format vaf;
        va_list args;
  
        va_start(args, fmt);
 -      printk(KERN_CRIT "EXT3-fs (%s): error: %s: ", sb->s_id, function);
 -      vprintk(fmt, args);
 -      printk("\n");
 +
 +      vaf.fmt = fmt;
 +      vaf.va = &args;
 +
 +      printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n",
 +             sb->s_id, function, &vaf);
 +
        va_end(args);
  
        if (test_opt(sb, ERRORS_PANIC))
                journal_abort(EXT3_SB(sb)->s_journal, -EIO);
  }
  
 -void ext3_warning (struct super_block * sb, const char * function,
 -                 const char * fmt, ...)
 +void ext3_warning(struct super_block *sb, const char *function,
 +                const char *fmt, ...)
  {
 +      struct va_format vaf;
        va_list args;
  
        va_start(args, fmt);
 -      printk(KERN_WARNING "EXT3-fs (%s): warning: %s: ",
 -             sb->s_id, function);
 -      vprintk(fmt, args);
 -      printk("\n");
 +
 +      vaf.fmt = fmt;
 +      vaf.va = &args;
 +
 +      printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n",
 +             sb->s_id, function, &vaf);
 +
        va_end(args);
  }
  
@@@ -364,7 -347,7 +364,7 @@@ static struct block_device *ext3_blkdev
        struct block_device *bdev;
        char b[BDEVNAME_SIZE];
  
-       bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+       bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
        if (IS_ERR(bdev))
                goto fail;
        return bdev;
@@@ -381,8 -364,7 +381,7 @@@ fail
   */
  static int ext3_blkdev_put(struct block_device *bdev)
  {
-       bd_release(bdev);
-       return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
+       return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
  }
  
  static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
@@@ -497,13 -479,6 +496,13 @@@ static struct inode *ext3_alloc_inode(s
        return &ei->vfs_inode;
  }
  
 +static void ext3_i_callback(struct rcu_head *head)
 +{
 +      struct inode *inode = container_of(head, struct inode, i_rcu);
 +      INIT_LIST_HEAD(&inode->i_dentry);
 +      kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
 +}
 +
  static void ext3_destroy_inode(struct inode *inode)
  {
        if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
                                false);
                dump_stack();
        }
 -      kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
 +      call_rcu(&inode->i_rcu, ext3_i_callback);
  }
  
  static void init_once(void *foo)
@@@ -1866,15 -1841,13 +1865,15 @@@ static int ext3_fill_super (struct supe
                goto failed_mount;
        }
  
 -      if (generic_check_addressable(sb->s_blocksize_bits,
 -                                    le32_to_cpu(es->s_blocks_count))) {
 +      err = generic_check_addressable(sb->s_blocksize_bits,
 +                                      le32_to_cpu(es->s_blocks_count));
 +      if (err) {
                ext3_msg(sb, KERN_ERR,
                        "error: filesystem is too large to mount safely");
                if (sizeof(sector_t) < 8)
                        ext3_msg(sb, KERN_ERR,
                                "error: CONFIG_LBDAF not enabled");
 +              ret = err;
                goto failed_mount;
        }
  
@@@ -2162,13 -2135,6 +2161,6 @@@ static journal_t *ext3_get_dev_journal(
        if (bdev == NULL)
                return NULL;
  
-       if (bd_claim(bdev, sb)) {
-               ext3_msg(sb, KERN_ERR,
-                       "error: failed to claim external journal device");
-               blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
-               return NULL;
-       }
        blocksize = sb->s_blocksize;
        hblock = bdev_logical_block_size(bdev);
        if (blocksize < hblock) {
@@@ -2317,7 -2283,7 +2309,7 @@@ static int ext3_load_journal(struct sup
        EXT3_SB(sb)->s_journal = journal;
        ext3_clear_journal_err(sb, es);
  
 -      if (journal_devnum &&
 +      if (!really_read_only && journal_devnum &&
            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
                es->s_journal_dev = cpu_to_le32(journal_devnum);
  
diff --combined fs/ext4/super.c
@@@ -388,14 -388,13 +388,14 @@@ static void ext4_handle_error(struct su
  void __ext4_error(struct super_block *sb, const char *function,
                  unsigned int line, const char *fmt, ...)
  {
 +      struct va_format vaf;
        va_list args;
  
        va_start(args, fmt);
 -      printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: ",
 -             sb->s_id, function, line, current->comm);
 -      vprintk(fmt, args);
 -      printk("\n");
 +      vaf.fmt = fmt;
 +      vaf.va = &args;
 +      printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
 +             sb->s_id, function, line, current->comm, &vaf);
        va_end(args);
  
        ext4_handle_error(sb);
@@@ -406,31 -405,28 +406,31 @@@ void ext4_error_inode(struct inode *ino
                      const char *fmt, ...)
  {
        va_list args;
 +      struct va_format vaf;
        struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
  
        es->s_last_error_ino = cpu_to_le32(inode->i_ino);
        es->s_last_error_block = cpu_to_le64(block);
        save_error_info(inode->i_sb, function, line);
        va_start(args, fmt);
 +      vaf.fmt = fmt;
 +      vaf.va = &args;
        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
               inode->i_sb->s_id, function, line, inode->i_ino);
        if (block)
 -              printk("block %llu: ", block);
 -      printk("comm %s: ", current->comm);
 -      vprintk(fmt, args);
 -      printk("\n");
 +              printk(KERN_CONT "block %llu: ", block);
 +      printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf);
        va_end(args);
  
        ext4_handle_error(inode->i_sb);
  }
  
  void ext4_error_file(struct file *file, const char *function,
 -                   unsigned int line, const char *fmt, ...)
 +                   unsigned int line, ext4_fsblk_t block,
 +                   const char *fmt, ...)
  {
        va_list args;
 +      struct va_format vaf;
        struct ext4_super_block *es;
        struct inode *inode = file->f_dentry->d_inode;
        char pathname[80], *path;
        es = EXT4_SB(inode->i_sb)->s_es;
        es->s_last_error_ino = cpu_to_le32(inode->i_ino);
        save_error_info(inode->i_sb, function, line);
 -      va_start(args, fmt);
        path = d_path(&(file->f_path), pathname, sizeof(pathname));
 -      if (!path)
 +      if (IS_ERR(path))
                path = "(unknown)";
        printk(KERN_CRIT
 -             "EXT4-fs error (device %s): %s:%d: inode #%lu "
 -             "(comm %s path %s): ",
 -             inode->i_sb->s_id, function, line, inode->i_ino,
 -             current->comm, path);
 -      vprintk(fmt, args);
 -      printk("\n");
 +             "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
 +             inode->i_sb->s_id, function, line, inode->i_ino);
 +      if (block)
 +              printk(KERN_CONT "block %llu: ", block);
 +      va_start(args, fmt);
 +      vaf.fmt = fmt;
 +      vaf.va = &args;
 +      printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf);
        va_end(args);
  
        ext4_handle_error(inode->i_sb);
@@@ -548,29 -543,28 +548,29 @@@ void __ext4_abort(struct super_block *s
                panic("EXT4-fs panic from previous error\n");
  }
  
 -void ext4_msg (struct super_block * sb, const char *prefix,
 -                 const char *fmt, ...)
 +void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
  {
 +      struct va_format vaf;
        va_list args;
  
        va_start(args, fmt);
 -      printk("%sEXT4-fs (%s): ", prefix, sb->s_id);
 -      vprintk(fmt, args);
 -      printk("\n");
 +      vaf.fmt = fmt;
 +      vaf.va = &args;
 +      printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
        va_end(args);
  }
  
  void __ext4_warning(struct super_block *sb, const char *function,
                    unsigned int line, const char *fmt, ...)
  {
 +      struct va_format vaf;
        va_list args;
  
        va_start(args, fmt);
 -      printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: ",
 -             sb->s_id, function, line);
 -      vprintk(fmt, args);
 -      printk("\n");
 +      vaf.fmt = fmt;
 +      vaf.va = &args;
 +      printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
 +             sb->s_id, function, line, &vaf);
        va_end(args);
  }
  
@@@ -581,25 -575,21 +581,25 @@@ void __ext4_grp_locked_error(const cha
  __releases(bitlock)
  __acquires(bitlock)
  {
 +      struct va_format vaf;
        va_list args;
        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
  
        es->s_last_error_ino = cpu_to_le32(ino);
        es->s_last_error_block = cpu_to_le64(block);
        __save_error_info(sb, function, line);
 +
        va_start(args, fmt);
 +
 +      vaf.fmt = fmt;
 +      vaf.va = &args;
        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
               sb->s_id, function, line, grp);
        if (ino)
 -              printk("inode %lu: ", ino);
 +              printk(KERN_CONT "inode %lu: ", ino);
        if (block)
 -              printk("block %llu:", (unsigned long long) block);
 -      vprintk(fmt, args);
 -      printk("\n");
 +              printk(KERN_CONT "block %llu:", (unsigned long long) block);
 +      printk(KERN_CONT "%pV\n", &vaf);
        va_end(args);
  
        if (test_opt(sb, ERRORS_CONT)) {
@@@ -657,7 -647,7 +657,7 @@@ static struct block_device *ext4_blkdev
        struct block_device *bdev;
        char b[BDEVNAME_SIZE];
  
-       bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+       bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
        if (IS_ERR(bdev))
                goto fail;
        return bdev;
@@@ -673,8 -663,7 +673,7 @@@ fail
   */
  static int ext4_blkdev_put(struct block_device *bdev)
  {
-       bd_release(bdev);
-       return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
+       return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
  }
  
  static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
@@@ -818,15 -807,21 +817,15 @@@ static struct inode *ext4_alloc_inode(s
        memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
        INIT_LIST_HEAD(&ei->i_prealloc_list);
        spin_lock_init(&ei->i_prealloc_lock);
 -      /*
 -       * Note:  We can be called before EXT4_SB(sb)->s_journal is set,
 -       * therefore it can be null here.  Don't check it, just initialize
 -       * jinode.
 -       */
 -      jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
        ei->i_reserved_data_blocks = 0;
        ei->i_reserved_meta_blocks = 0;
        ei->i_allocated_meta_blocks = 0;
        ei->i_da_metadata_calc_len = 0;
 -      ei->i_delalloc_reserved_flag = 0;
        spin_lock_init(&(ei->i_block_reservation_lock));
  #ifdef CONFIG_QUOTA
        ei->i_reserved_quota = 0;
  #endif
 +      ei->jinode = NULL;
        INIT_LIST_HEAD(&ei->i_completed_io_list);
        spin_lock_init(&ei->i_completed_io_lock);
        ei->cur_aio_dio = NULL;
@@@ -845,13 -840,6 +844,13 @@@ static int ext4_drop_inode(struct inod
        return drop;
  }
  
 +static void ext4_i_callback(struct rcu_head *head)
 +{
 +      struct inode *inode = container_of(head, struct inode, i_rcu);
 +      INIT_LIST_HEAD(&inode->i_dentry);
 +      kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 +}
 +
  static void ext4_destroy_inode(struct inode *inode)
  {
        ext4_ioend_wait(inode);
                                true);
                dump_stack();
        }
 -      kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 +      call_rcu(&inode->i_rcu, ext4_i_callback);
  }
  
  static void init_once(void *foo)
@@@ -902,12 -890,9 +901,12 @@@ void ext4_clear_inode(struct inode *ino
        end_writeback(inode);
        dquot_drop(inode);
        ext4_discard_preallocations(inode);
 -      if (EXT4_JOURNAL(inode))
 -              jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
 -                                     &EXT4_I(inode)->jinode);
 +      if (EXT4_I(inode)->jinode) {
 +              jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
 +                                             EXT4_I(inode)->jinode);
 +              jbd2_free_inode(EXT4_I(inode)->jinode);
 +              EXT4_I(inode)->jinode = NULL;
 +      }
  }
  
  static inline void ext4_show_quota_options(struct seq_file *seq,
@@@ -1040,8 -1025,6 +1039,8 @@@ static int ext4_show_options(struct seq
            !(def_mount_opts & EXT4_DEFM_NODELALLOC))
                seq_puts(seq, ",nodelalloc");
  
 +      if (test_opt(sb, MBLK_IO_SUBMIT))
 +              seq_puts(seq, ",mblk_io_submit");
        if (sbi->s_stripe)
                seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
        /*
@@@ -1213,6 -1196,7 +1212,6 @@@ static const struct super_operations ex
        .quota_write    = ext4_quota_write,
  #endif
        .bdev_try_to_free_page = bdev_try_to_free_page,
 -      .trim_fs        = ext4_trim_fs
  };
  
  static const struct super_operations ext4_nojournal_sops = {
@@@ -1255,8 -1239,8 +1254,8 @@@ enum 
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
        Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
        Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
 -      Opt_stripe, Opt_delalloc, Opt_nodelalloc,
 -      Opt_block_validity, Opt_noblock_validity,
 +      Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
 +      Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
        Opt_inode_readahead_blks, Opt_journal_ioprio,
        Opt_dioread_nolock, Opt_dioread_lock,
        Opt_discard, Opt_nodiscard,
@@@ -1320,8 -1304,6 +1319,8 @@@ static const match_table_t tokens = 
        {Opt_resize, "resize"},
        {Opt_delalloc, "delalloc"},
        {Opt_nodelalloc, "nodelalloc"},
 +      {Opt_mblk_io_submit, "mblk_io_submit"},
 +      {Opt_nomblk_io_submit, "nomblk_io_submit"},
        {Opt_block_validity, "block_validity"},
        {Opt_noblock_validity, "noblock_validity"},
        {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
@@@ -1400,7 -1382,7 +1399,7 @@@ static int set_qf_name(struct super_blo
                sbi->s_qf_names[qtype] = NULL;
                return 0;
        }
 -      set_opt(sbi->s_mount_opt, QUOTA);
 +      set_opt(sb, QUOTA);
        return 1;
  }
  
@@@ -1455,21 -1437,21 +1454,21 @@@ static int parse_options(char *options
                switch (token) {
                case Opt_bsd_df:
                        ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
 -                      clear_opt(sbi->s_mount_opt, MINIX_DF);
 +                      clear_opt(sb, MINIX_DF);
                        break;
                case Opt_minix_df:
                        ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
 -                      set_opt(sbi->s_mount_opt, MINIX_DF);
 +                      set_opt(sb, MINIX_DF);
  
                        break;
                case Opt_grpid:
                        ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
 -                      set_opt(sbi->s_mount_opt, GRPID);
 +                      set_opt(sb, GRPID);
  
                        break;
                case Opt_nogrpid:
                        ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
 -                      clear_opt(sbi->s_mount_opt, GRPID);
 +                      clear_opt(sb, GRPID);
  
                        break;
                case Opt_resuid:
                        /* *sb_block = match_int(&args[0]); */
                        break;
                case Opt_err_panic:
 -                      clear_opt(sbi->s_mount_opt, ERRORS_CONT);
 -                      clear_opt(sbi->s_mount_opt, ERRORS_RO);
 -                      set_opt(sbi->s_mount_opt, ERRORS_PANIC);
 +                      clear_opt(sb, ERRORS_CONT);
 +                      clear_opt(sb, ERRORS_RO);
 +                      set_opt(sb, ERRORS_PANIC);
                        break;
                case Opt_err_ro:
 -                      clear_opt(sbi->s_mount_opt, ERRORS_CONT);
 -                      clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
 -                      set_opt(sbi->s_mount_opt, ERRORS_RO);
 +                      clear_opt(sb, ERRORS_CONT);
 +                      clear_opt(sb, ERRORS_PANIC);
 +                      set_opt(sb, ERRORS_RO);
                        break;
                case Opt_err_cont:
 -                      clear_opt(sbi->s_mount_opt, ERRORS_RO);
 -                      clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
 -                      set_opt(sbi->s_mount_opt, ERRORS_CONT);
 +                      clear_opt(sb, ERRORS_RO);
 +                      clear_opt(sb, ERRORS_PANIC);
 +                      set_opt(sb, ERRORS_CONT);
                        break;
                case Opt_nouid32:
 -                      set_opt(sbi->s_mount_opt, NO_UID32);
 +                      set_opt(sb, NO_UID32);
                        break;
                case Opt_debug:
 -                      set_opt(sbi->s_mount_opt, DEBUG);
 +                      set_opt(sb, DEBUG);
                        break;
                case Opt_oldalloc:
 -                      set_opt(sbi->s_mount_opt, OLDALLOC);
 +                      set_opt(sb, OLDALLOC);
                        break;
                case Opt_orlov:
 -                      clear_opt(sbi->s_mount_opt, OLDALLOC);
 +                      clear_opt(sb, OLDALLOC);
                        break;
  #ifdef CONFIG_EXT4_FS_XATTR
                case Opt_user_xattr:
 -                      set_opt(sbi->s_mount_opt, XATTR_USER);
 +                      set_opt(sb, XATTR_USER);
                        break;
                case Opt_nouser_xattr:
 -                      clear_opt(sbi->s_mount_opt, XATTR_USER);
 +                      clear_opt(sb, XATTR_USER);
                        break;
  #else
                case Opt_user_xattr:
  #endif
  #ifdef CONFIG_EXT4_FS_POSIX_ACL
                case Opt_acl:
 -                      set_opt(sbi->s_mount_opt, POSIX_ACL);
 +                      set_opt(sb, POSIX_ACL);
                        break;
                case Opt_noacl:
 -                      clear_opt(sbi->s_mount_opt, POSIX_ACL);
 +                      clear_opt(sb, POSIX_ACL);
                        break;
  #else
                case Opt_acl:
                                         "Cannot specify journal on remount");
                                return 0;
                        }
 -                      set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
 +                      set_opt(sb, UPDATE_JOURNAL);
                        break;
                case Opt_journal_dev:
                        if (is_remount) {
                        *journal_devnum = option;
                        break;
                case Opt_journal_checksum:
 -                      set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
 +                      set_opt(sb, JOURNAL_CHECKSUM);
                        break;
                case Opt_journal_async_commit:
 -                      set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
 -                      set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
 +                      set_opt(sb, JOURNAL_ASYNC_COMMIT);
 +                      set_opt(sb, JOURNAL_CHECKSUM);
                        break;
                case Opt_noload:
 -                      set_opt(sbi->s_mount_opt, NOLOAD);
 +                      set_opt(sb, NOLOAD);
                        break;
                case Opt_commit:
                        if (match_int(&args[0], &option))
                                        return 0;
                                }
                        } else {
 -                              clear_opt(sbi->s_mount_opt, DATA_FLAGS);
 +                              clear_opt(sb, DATA_FLAGS);
                                sbi->s_mount_opt |= data_opt;
                        }
                        break;
                case Opt_data_err_abort:
 -                      set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
 +                      set_opt(sb, DATA_ERR_ABORT);
                        break;
                case Opt_data_err_ignore:
 -                      clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
 +                      clear_opt(sb, DATA_ERR_ABORT);
                        break;
  #ifdef CONFIG_QUOTA
                case Opt_usrjquota:
@@@ -1661,12 -1643,12 +1660,12 @@@ set_qf_format
                        break;
                case Opt_quota:
                case Opt_usrquota:
 -                      set_opt(sbi->s_mount_opt, QUOTA);
 -                      set_opt(sbi->s_mount_opt, USRQUOTA);
 +                      set_opt(sb, QUOTA);
 +                      set_opt(sb, USRQUOTA);
                        break;
                case Opt_grpquota:
 -                      set_opt(sbi->s_mount_opt, QUOTA);
 -                      set_opt(sbi->s_mount_opt, GRPQUOTA);
 +                      set_opt(sb, QUOTA);
 +                      set_opt(sb, GRPQUOTA);
                        break;
                case Opt_noquota:
                        if (sb_any_quota_loaded(sb)) {
                                        "options when quota turned on");
                                return 0;
                        }
 -                      clear_opt(sbi->s_mount_opt, QUOTA);
 -                      clear_opt(sbi->s_mount_opt, USRQUOTA);
 -                      clear_opt(sbi->s_mount_opt, GRPQUOTA);
 +                      clear_opt(sb, QUOTA);
 +                      clear_opt(sb, USRQUOTA);
 +                      clear_opt(sb, GRPQUOTA);
                        break;
  #else
                case Opt_quota:
                        sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
                        break;
                case Opt_nobarrier:
 -                      clear_opt(sbi->s_mount_opt, BARRIER);
 +                      clear_opt(sb, BARRIER);
                        break;
                case Opt_barrier:
                        if (args[0].from) {
                        } else
                                option = 1;     /* No argument, default to 1 */
                        if (option)
 -                              set_opt(sbi->s_mount_opt, BARRIER);
 +                              set_opt(sb, BARRIER);
                        else
 -                              clear_opt(sbi->s_mount_opt, BARRIER);
 +                              clear_opt(sb, BARRIER);
                        break;
                case Opt_ignore:
                        break;
                                 "Ignoring deprecated bh option");
                        break;
                case Opt_i_version:
 -                      set_opt(sbi->s_mount_opt, I_VERSION);
 +                      set_opt(sb, I_VERSION);
                        sb->s_flags |= MS_I_VERSION;
                        break;
                case Opt_nodelalloc:
 -                      clear_opt(sbi->s_mount_opt, DELALLOC);
 +                      clear_opt(sb, DELALLOC);
 +                      break;
 +              case Opt_mblk_io_submit:
 +                      set_opt(sb, MBLK_IO_SUBMIT);
 +                      break;
 +              case Opt_nomblk_io_submit:
 +                      clear_opt(sb, MBLK_IO_SUBMIT);
                        break;
                case Opt_stripe:
                        if (match_int(&args[0], &option))
                        sbi->s_stripe = option;
                        break;
                case Opt_delalloc:
 -                      set_opt(sbi->s_mount_opt, DELALLOC);
 +                      set_opt(sb, DELALLOC);
                        break;
                case Opt_block_validity:
 -                      set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
 +                      set_opt(sb, BLOCK_VALIDITY);
                        break;
                case Opt_noblock_validity:
 -                      clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
 +                      clear_opt(sb, BLOCK_VALIDITY);
                        break;
                case Opt_inode_readahead_blks:
                        if (match_int(&args[0], &option))
                                                            option);
                        break;
                case Opt_noauto_da_alloc:
 -                      set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
 +                      set_opt(sbNO_AUTO_DA_ALLOC);
                        break;
                case Opt_auto_da_alloc:
                        if (args[0].from) {
                        } else
                                option = 1;     /* No argument, default to 1 */
                        if (option)
 -                              clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
 +                              clear_opt(sb, NO_AUTO_DA_ALLOC);
                        else
 -                              set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
 +                              set_opt(sb,NO_AUTO_DA_ALLOC);
                        break;
                case Opt_discard:
 -                      set_opt(sbi->s_mount_opt, DISCARD);
 +                      set_opt(sb, DISCARD);
                        break;
                case Opt_nodiscard:
 -                      clear_opt(sbi->s_mount_opt, DISCARD);
 +                      clear_opt(sb, DISCARD);
                        break;
                case Opt_dioread_nolock:
 -                      set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
 +                      set_opt(sb, DIOREAD_NOLOCK);
                        break;
                case Opt_dioread_lock:
 -                      clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
 +                      clear_opt(sb, DIOREAD_NOLOCK);
                        break;
                case Opt_init_inode_table:
 -                      set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
 +                      set_opt(sb, INIT_INODE_TABLE);
                        if (args[0].from) {
                                if (match_int(&args[0], &option))
                                        return 0;
                        sbi->s_li_wait_mult = option;
                        break;
                case Opt_noinit_inode_table:
 -                      clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
 +                      clear_opt(sb, INIT_INODE_TABLE);
                        break;
                default:
                        ext4_msg(sb, KERN_ERR,
  #ifdef CONFIG_QUOTA
        if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
                if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
 -                      clear_opt(sbi->s_mount_opt, USRQUOTA);
 +                      clear_opt(sb, USRQUOTA);
  
                if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
 -                      clear_opt(sbi->s_mount_opt, GRPQUOTA);
 +                      clear_opt(sb, GRPQUOTA);
  
                if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
                        ext4_msg(sb, KERN_ERR, "old and new quota "
@@@ -1909,12 -1885,12 +1908,12 @@@ static int ext4_setup_super(struct supe
        ext4_commit_super(sb, 1);
        if (test_opt(sb, DEBUG))
                printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
 -                              "bpg=%lu, ipg=%lu, mo=%04x]\n",
 +                              "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
                        sb->s_blocksize,
                        sbi->s_groups_count,
                        EXT4_BLOCKS_PER_GROUP(sb),
                        EXT4_INODES_PER_GROUP(sb),
 -                      sbi->s_mount_opt);
 +                      sbi->s_mount_opt, sbi->s_mount_opt2);
  
        return res;
  }
@@@ -1944,13 -1920,14 +1943,13 @@@ static int ext4_fill_flex_info(struct s
        size = flex_group_count * sizeof(struct flex_groups);
        sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
        if (sbi->s_flex_groups == NULL) {
 -              sbi->s_flex_groups = vmalloc(size);
 -              if (sbi->s_flex_groups)
 -                      memset(sbi->s_flex_groups, 0, size);
 -      }
 -      if (sbi->s_flex_groups == NULL) {
 -              ext4_msg(sb, KERN_ERR, "not enough memory for "
 -                              "%u flex groups", flex_group_count);
 -              goto failed;
 +              sbi->s_flex_groups = vzalloc(size);
 +              if (sbi->s_flex_groups == NULL) {
 +                      ext4_msg(sb, KERN_ERR,
 +                               "not enough memory for %u flex groups",
 +                               flex_group_count);
 +                      goto failed;
 +              }
        }
  
        for (i = 0; i < sbi->s_groups_count; i++) {
@@@ -2821,6 -2798,9 +2820,6 @@@ static void ext4_clear_request_list(voi
        struct ext4_li_request *elr;
  
        mutex_lock(&ext4_li_info->li_list_mtx);
 -      if (list_empty(&ext4_li_info->li_request_list))
 -              return;
 -
        list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
                elr = list_entry(pos, struct ext4_li_request,
                                 lr_request);
@@@ -2929,7 -2909,7 +2928,7 @@@ static int ext4_register_li_request(str
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_li_request *elr;
        ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
 -      int ret;
 +      int ret = 0;
  
        if (sbi->s_li_request != NULL)
                return 0;
@@@ -3084,41 -3064,41 +3083,41 @@@ static int ext4_fill_super(struct super
  
        /* Set defaults before we parse the mount options */
        def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
 -      set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
 +      set_opt(sb, INIT_INODE_TABLE);
        if (def_mount_opts & EXT4_DEFM_DEBUG)
 -              set_opt(sbi->s_mount_opt, DEBUG);
 +              set_opt(sb, DEBUG);
        if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
                ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups",
                        "2.6.38");
 -              set_opt(sbi->s_mount_opt, GRPID);
 +              set_opt(sb, GRPID);
        }
        if (def_mount_opts & EXT4_DEFM_UID16)
 -              set_opt(sbi->s_mount_opt, NO_UID32);
 +              set_opt(sb, NO_UID32);
  #ifdef CONFIG_EXT4_FS_XATTR
        if (def_mount_opts & EXT4_DEFM_XATTR_USER)
 -              set_opt(sbi->s_mount_opt, XATTR_USER);
 +              set_opt(sb, XATTR_USER);
  #endif
  #ifdef CONFIG_EXT4_FS_POSIX_ACL
        if (def_mount_opts & EXT4_DEFM_ACL)
 -              set_opt(sbi->s_mount_opt, POSIX_ACL);
 +              set_opt(sb, POSIX_ACL);
  #endif
        if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
 -              set_opt(sbi->s_mount_opt, JOURNAL_DATA);
 +              set_opt(sb, JOURNAL_DATA);
        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
 -              set_opt(sbi->s_mount_opt, ORDERED_DATA);
 +              set_opt(sb, ORDERED_DATA);
        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
 -              set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
 +              set_opt(sb, WRITEBACK_DATA);
  
        if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
 -              set_opt(sbi->s_mount_opt, ERRORS_PANIC);
 +              set_opt(sb, ERRORS_PANIC);
        else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
 -              set_opt(sbi->s_mount_opt, ERRORS_CONT);
 +              set_opt(sb, ERRORS_CONT);
        else
 -              set_opt(sbi->s_mount_opt, ERRORS_RO);
 +              set_opt(sb, ERRORS_RO);
        if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
 -              set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
 +              set_opt(sb, BLOCK_VALIDITY);
        if (def_mount_opts & EXT4_DEFM_DISCARD)
 -              set_opt(sbi->s_mount_opt, DISCARD);
 +              set_opt(sb, DISCARD);
  
        sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
        sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
        sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
  
        if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
 -              set_opt(sbi->s_mount_opt, BARRIER);
 +              set_opt(sb, BARRIER);
  
        /*
         * enable delayed allocation by default
         */
        if (!IS_EXT3_SB(sb) &&
            ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
 -              set_opt(sbi->s_mount_opt, DELALLOC);
 +              set_opt(sb, DELALLOC);
  
        if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
                           &journal_devnum, &journal_ioprio, NULL, 0)) {
         * Test whether we have more sectors than will fit in sector_t,
         * and whether the max offset is addressable by the page cache.
         */
 -      ret = generic_check_addressable(sb->s_blocksize_bits,
 +      err = generic_check_addressable(sb->s_blocksize_bits,
                                        ext4_blocks_count(es));
 -      if (ret) {
 +      if (err) {
                ext4_msg(sb, KERN_ERR, "filesystem"
                         " too large to mount safely on this system");
                if (sizeof(sector_t) < 8)
                        ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
 +              ret = err;
                goto failed_mount;
        }
  
                       "suppressed and not mounted read-only");
                goto failed_mount_wq;
        } else {
 -              clear_opt(sbi->s_mount_opt, DATA_FLAGS);
 -              set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
 +              clear_opt(sb, DATA_FLAGS);
 +              set_opt(sb, WRITEBACK_DATA);
                sbi->s_journal = NULL;
                needs_recovery = 0;
                goto no_journal;
                 */
                if (jbd2_journal_check_available_features
                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
 -                      set_opt(sbi->s_mount_opt, ORDERED_DATA);
 +                      set_opt(sb, ORDERED_DATA);
                else
 -                      set_opt(sbi->s_mount_opt, JOURNAL_DATA);
 +                      set_opt(sb, JOURNAL_DATA);
                break;
  
        case EXT4_MOUNT_ORDERED_DATA:
@@@ -3569,18 -3548,18 +3568,18 @@@ no_journal
            (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
                ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
                         "requested data journaling mode");
 -              clear_opt(sbi->s_mount_opt, DELALLOC);
 +              clear_opt(sb, DELALLOC);
        }
        if (test_opt(sb, DIOREAD_NOLOCK)) {
                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
                        ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
                                "option - requested data journaling mode");
 -                      clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
 +                      clear_opt(sb, DIOREAD_NOLOCK);
                }
                if (sb->s_blocksize < PAGE_SIZE) {
                        ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
                                "option - block size is too small");
 -                      clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
 +                      clear_opt(sb, DIOREAD_NOLOCK);
                }
        }
  
@@@ -3778,13 -3757,6 +3777,6 @@@ static journal_t *ext4_get_dev_journal(
        if (bdev == NULL)
                return NULL;
  
-       if (bd_claim(bdev, sb)) {
-               ext4_msg(sb, KERN_ERR,
-                       "failed to claim external journal device");
-               blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
-               return NULL;
-       }
        blocksize = sb->s_blocksize;
        hblock = bdev_logical_block_size(bdev);
        if (blocksize < hblock) {
@@@ -4179,22 -4151,6 +4171,22 @@@ static int ext4_unfreeze(struct super_b
        return 0;
  }
  
 +/*
 + * Structure to save mount options for ext4_remount's benefit
 + */
 +struct ext4_mount_options {
 +      unsigned long s_mount_opt;
 +      unsigned long s_mount_opt2;
 +      uid_t s_resuid;
 +      gid_t s_resgid;
 +      unsigned long s_commit_interval;
 +      u32 s_min_batch_time, s_max_batch_time;
 +#ifdef CONFIG_QUOTA
 +      int s_jquota_fmt;
 +      char *s_qf_names[MAXQUOTAS];
 +#endif
 +};
 +
  static int ext4_remount(struct super_block *sb, int *flags, char *data)
  {
        struct ext4_super_block *es;
        lock_super(sb);
        old_sb_flags = sb->s_flags;
        old_opts.s_mount_opt = sbi->s_mount_opt;
 +      old_opts.s_mount_opt2 = sbi->s_mount_opt2;
        old_opts.s_resuid = sbi->s_resuid;
        old_opts.s_resgid = sbi->s_resgid;
        old_opts.s_commit_interval = sbi->s_commit_interval;
  restore_opts:
        sb->s_flags = old_sb_flags;
        sbi->s_mount_opt = old_opts.s_mount_opt;
 +      sbi->s_mount_opt2 = old_opts.s_mount_opt2;
        sbi->s_resuid = old_opts.s_resuid;
        sbi->s_resgid = old_opts.s_resgid;
        sbi->s_commit_interval = old_opts.s_commit_interval;
diff --combined fs/gfs2/ops_fstype.c
@@@ -440,6 -440,7 +440,6 @@@ static int gfs2_lookup_root(struct supe
                iput(inode);
                return -ENOMEM;
        }
 -      dentry->d_op = &gfs2_dops;
        *dptr = dentry;
        return 0;
  }
@@@ -1105,7 -1106,6 +1105,7 @@@ static int fill_super(struct super_bloc
  
        sb->s_magic = GFS2_MAGIC;
        sb->s_op = &gfs2_super_ops;
 +      sb->s_d_op = &gfs2_dops;
        sb->s_export_op = &gfs2_export_ops;
        sb->s_xattr = gfs2_xattr_handlers;
        sb->s_qcop = &gfs2_quotactl_ops;
@@@ -1268,7 -1268,7 +1268,7 @@@ static struct dentry *gfs2_mount(struc
  {
        struct block_device *bdev;
        struct super_block *s;
-       fmode_t mode = FMODE_READ;
+       fmode_t mode = FMODE_READ | FMODE_EXCL;
        int error;
        struct gfs2_args args;
        struct gfs2_sbd *sdp;
        if (!(flags & MS_RDONLY))
                mode |= FMODE_WRITE;
  
-       bdev = open_bdev_exclusive(dev_name, mode, fs_type);
+       bdev = blkdev_get_by_path(dev_name, mode, fs_type);
        if (IS_ERR(bdev))
                return ERR_CAST(bdev);
  
                goto error_bdev;
  
        if (s->s_root)
-               close_bdev_exclusive(bdev, mode);
+               blkdev_put(bdev, mode);
  
        memset(&args, 0, sizeof(args));
        args.ar_quota = GFS2_QUOTA_DEFAULT;
@@@ -1342,7 -1342,7 +1342,7 @@@ error_super
        deactivate_locked_super(s);
        return ERR_PTR(error);
  error_bdev:
-       close_bdev_exclusive(bdev, mode);
+       blkdev_put(bdev, mode);
        return ERR_PTR(error);
  }
  
diff --combined fs/nfsd/vfs.c
@@@ -845,11 -845,6 +845,6 @@@ nfsd_splice_actor(struct pipe_inode_inf
        struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
        struct page *page = buf->page;
        size_t size;
-       int ret;
-       ret = buf->ops->confirm(pipe, buf);
-       if (unlikely(ret))
-               return ret;
  
        size = sd->len;
  
@@@ -1756,7 -1751,8 +1751,7 @@@ nfsd_rename(struct svc_rqst *rqstp, str
                goto out_dput_new;
  
        if (svc_msnfs(ffhp) &&
 -              ((atomic_read(&odentry->d_count) > 1)
 -               || (atomic_read(&ndentry->d_count) > 1))) {
 +              ((odentry->d_count > 1) || (ndentry->d_count > 1))) {
                        host_err = -EPERM;
                        goto out_dput_new;
        }
@@@ -1842,7 -1838,7 +1837,7 @@@ nfsd_unlink(struct svc_rqst *rqstp, str
        if (type != S_IFDIR) { /* It's UNLINK */
  #ifdef MSNFS
                if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
 -                      (atomic_read(&rdentry->d_count) > 1)) {
 +                      (rdentry->d_count > 1)) {
                        host_err = -EPERM;
                } else
  #endif
diff --combined fs/nilfs2/super.c
@@@ -47,6 -47,7 +47,6 @@@
  #include <linux/crc32.h>
  #include <linux/vfs.h>
  #include <linux/writeback.h>
 -#include <linux/kobject.h>
  #include <linux/seq_file.h>
  #include <linux/mount.h>
  #include "nilfs.h"
@@@ -110,17 -111,12 +110,17 @@@ void nilfs_error(struct super_block *sb
                 const char *fmt, ...)
  {
        struct nilfs_sb_info *sbi = NILFS_SB(sb);
 +      struct va_format vaf;
        va_list args;
  
        va_start(args, fmt);
 -      printk(KERN_CRIT "NILFS error (device %s): %s: ", sb->s_id, function);
 -      vprintk(fmt, args);
 -      printk("\n");
 +
 +      vaf.fmt = fmt;
 +      vaf.va = &args;
 +
 +      printk(KERN_CRIT "NILFS error (device %s): %s: %pV\n",
 +             sb->s_id, function, &vaf);
 +
        va_end(args);
  
        if (!(sb->s_flags & MS_RDONLY)) {
  void nilfs_warning(struct super_block *sb, const char *function,
                   const char *fmt, ...)
  {
 +      struct va_format vaf;
        va_list args;
  
        va_start(args, fmt);
 -      printk(KERN_WARNING "NILFS warning (device %s): %s: ",
 -             sb->s_id, function);
 -      vprintk(fmt, args);
 -      printk("\n");
 +
 +      vaf.fmt = fmt;
 +      vaf.va = &args;
 +
 +      printk(KERN_WARNING "NILFS warning (device %s): %s: %pV\n",
 +             sb->s_id, function, &vaf);
 +
        va_end(args);
  }
  
@@@ -170,13 -162,10 +170,13 @@@ struct inode *nilfs_alloc_inode(struct 
        return &ii->vfs_inode;
  }
  
 -void nilfs_destroy_inode(struct inode *inode)
 +static void nilfs_i_callback(struct rcu_head *head)
  {
 +      struct inode *inode = container_of(head, struct inode, i_rcu);
        struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
  
 +      INIT_LIST_HEAD(&inode->i_dentry);
 +
        if (mdi) {
                kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
                kfree(mdi);
        kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
  }
  
 +void nilfs_destroy_inode(struct inode *inode)
 +{
 +      call_rcu(&inode->i_rcu, nilfs_i_callback);
 +}
 +
  static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
  {
        struct the_nilfs *nilfs = sbi->s_nilfs;
@@@ -854,7 -838,7 +854,7 @@@ static int nilfs_attach_snapshot(struc
  
  static int nilfs_tree_was_touched(struct dentry *root_dentry)
  {
 -      return atomic_read(&root_dentry->d_count) > 1;
 +      return root_dentry->d_count > 1;
  }
  
  /**
@@@ -1018,11 -1002,11 +1018,11 @@@ static int nilfs_remount(struct super_b
        struct nilfs_sb_info *sbi = NILFS_SB(sb);
        struct the_nilfs *nilfs = sbi->s_nilfs;
        unsigned long old_sb_flags;
 -      struct nilfs_mount_options old_opts;
 +      unsigned long old_mount_opt;
        int err;
  
        old_sb_flags = sb->s_flags;
 -      old_opts.mount_opt = sbi->s_mount_opt;
 +      old_mount_opt = sbi->s_mount_opt;
  
        if (!parse_options(data, sb, 1)) {
                err = -EINVAL;
  
   restore_opts:
        sb->s_flags = old_sb_flags;
 -      sbi->s_mount_opt = old_opts.mount_opt;
 +      sbi->s_mount_opt = old_mount_opt;
        return err;
  }
  
@@@ -1163,14 -1147,14 +1163,14 @@@ nilfs_mount(struct file_system_type *fs
  {
        struct nilfs_super_data sd;
        struct super_block *s;
-       fmode_t mode = FMODE_READ;
+       fmode_t mode = FMODE_READ | FMODE_EXCL;
        struct dentry *root_dentry;
        int err, s_new = false;
  
        if (!(flags & MS_RDONLY))
                mode |= FMODE_WRITE;
  
-       sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type);
+       sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type);
        if (IS_ERR(sd.bdev))
                return ERR_CAST(sd.bdev);
  
        }
  
        if (!s_new)
-               close_bdev_exclusive(sd.bdev, mode);
+               blkdev_put(sd.bdev, mode);
  
        return root_dentry;
  
  
   failed:
        if (!s_new)
-               close_bdev_exclusive(sd.bdev, mode);
+               blkdev_put(sd.bdev, mode);
        return ERR_PTR(err);
  }
  
@@@ -82,7 -82,6 +82,7 @@@ static unsigned long o2hb_failed_region
  #define O2HB_DB_TYPE_REGION_LIVENODES 4
  #define O2HB_DB_TYPE_REGION_NUMBER    5
  #define O2HB_DB_TYPE_REGION_ELAPSED_TIME      6
 +#define O2HB_DB_TYPE_REGION_PINNED    7
  struct o2hb_debug_buf {
        int db_type;
        int db_size;
@@@ -102,7 -101,6 +102,7 @@@ static struct o2hb_debug_buf *o2hb_db_f
  #define O2HB_DEBUG_FAILEDREGIONS      "failed_regions"
  #define O2HB_DEBUG_REGION_NUMBER      "num"
  #define O2HB_DEBUG_REGION_ELAPSED_TIME        "elapsed_time_in_ms"
 +#define O2HB_DEBUG_REGION_PINNED      "pinned"
  
  static struct dentry *o2hb_debug_dir;
  static struct dentry *o2hb_debug_livenodes;
@@@ -134,33 -132,6 +134,33 @@@ char *o2hb_heartbeat_mode_desc[O2HB_HEA
  unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
  unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL;
  
 +/*
 + * o2hb_dependent_users tracks the number of registered callbacks that depend
 + * on heartbeat. o2net and o2dlm are two entities that register this callback.
 + * However only o2dlm depends on the heartbeat. It does not want the heartbeat
 + * to stop while a dlm domain is still active.
 + */
 +unsigned int o2hb_dependent_users;
 +
 +/*
 + * In global heartbeat mode, all regions are pinned if there are one or more
 + * dependent users and the quorum region count is <= O2HB_PIN_CUT_OFF. All
 + * regions are unpinned if the region count exceeds the cut off or the number
 + * of dependent users falls to zero.
 + */
 +#define O2HB_PIN_CUT_OFF              3
 +
 +/*
 + * In local heartbeat mode, we assume the dlm domain name to be the same as
 + * region uuid. This is true for domains created for the file system but not
 + * necessarily true for userdlm domains. This is a known limitation.
 + *
 + * In global heartbeat mode, we pin/unpin all o2hb regions. This solution
 + * works for both file system and userdlm domains.
 + */
 +static int o2hb_region_pin(const char *region_uuid);
 +static void o2hb_region_unpin(const char *region_uuid);
 +
  /* Only sets a new threshold if there are no active regions.
   *
   * No locking or otherwise interesting code is required for reading
@@@ -215,9 -186,7 +215,9 @@@ struct o2hb_region 
        struct config_item      hr_item;
  
        struct list_head        hr_all_item;
 -      unsigned                hr_unclean_stop:1;
 +      unsigned                hr_unclean_stop:1,
 +                              hr_item_pinned:1,
 +                              hr_item_dropped:1;
  
        /* protected by the hr_callback_sem */
        struct task_struct      *hr_task;
        struct dentry           *hr_debug_livenodes;
        struct dentry           *hr_debug_regnum;
        struct dentry           *hr_debug_elapsed_time;
 +      struct dentry           *hr_debug_pinned;
        struct o2hb_debug_buf   *hr_db_livenodes;
        struct o2hb_debug_buf   *hr_db_regnum;
        struct o2hb_debug_buf   *hr_db_elapsed_time;
 +      struct o2hb_debug_buf   *hr_db_pinned;
  
        /* let the person setting up hb wait for it to return until it
         * has reached a 'steady' state.  This will be fixed when we have
@@@ -340,7 -307,8 +340,7 @@@ static void o2hb_arm_write_timeout(stru
  
  static void o2hb_disarm_write_timeout(struct o2hb_region *reg)
  {
 -      cancel_delayed_work(&reg->hr_write_timeout_work);
 -      flush_scheduled_work();
 +      cancel_delayed_work_sync(&reg->hr_write_timeout_work);
  }
  
  static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc)
@@@ -734,14 -702,6 +734,14 @@@ static void o2hb_set_quorum_device(stru
               config_item_name(&reg->hr_item));
  
        set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
 +
 +      /*
 +       * If global heartbeat active, unpin all regions if the
 +       * region count > CUT_OFF
 +       */
 +      if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
 +                         O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
 +              o2hb_region_unpin(NULL);
  }
  
  static int o2hb_check_slot(struct o2hb_region *reg,
@@@ -1082,9 -1042,6 +1082,9 @@@ static int o2hb_thread(void *data
  
        set_user_nice(current, -20);
  
 +      /* Pin node */
 +      o2nm_depend_this_node();
 +
        while (!kthread_should_stop() && !reg->hr_unclean_stop) {
                /* We track the time spent inside
                 * o2hb_do_disk_heartbeat so that we avoid more than
                mlog_errno(ret);
        }
  
 +      /* Unpin node */
 +      o2nm_undepend_this_node();
 +
        mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n");
  
        return 0;
@@@ -1189,12 -1143,6 +1189,12 @@@ static int o2hb_debug_open(struct inod
                                                 reg->hr_last_timeout_start));
                goto done;
  
 +      case O2HB_DB_TYPE_REGION_PINNED:
 +              reg = (struct o2hb_region *)db->db_data;
 +              out += snprintf(buf + out, PAGE_SIZE - out, "%u\n",
 +                              !!reg->hr_item_pinned);
 +              goto done;
 +
        default:
                goto done;
        }
@@@ -1368,8 -1316,6 +1368,8 @@@ int o2hb_init(void
        memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
        memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));
  
 +      o2hb_dependent_users = 0;
 +
        return o2hb_debug_init();
  }
  
@@@ -1439,7 -1385,6 +1439,7 @@@ static void o2hb_region_release(struct 
        debugfs_remove(reg->hr_debug_livenodes);
        debugfs_remove(reg->hr_debug_regnum);
        debugfs_remove(reg->hr_debug_elapsed_time);
 +      debugfs_remove(reg->hr_debug_pinned);
        debugfs_remove(reg->hr_debug_dir);
  
        spin_lock(&o2hb_live_lock);
@@@ -1729,7 -1674,7 +1729,7 @@@ static ssize_t o2hb_region_dev_write(st
                goto out;
  
        reg->hr_bdev = I_BDEV(filp->f_mapping->host);
-       ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ);
+       ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL);
        if (ret) {
                reg->hr_bdev = NULL;
                goto out;
@@@ -2004,18 -1949,6 +2004,18 @@@ static int o2hb_debug_region_init(struc
                goto bail;
        }
  
 +      reg->hr_debug_pinned =
 +                      o2hb_debug_create(O2HB_DEBUG_REGION_PINNED,
 +                                        reg->hr_debug_dir,
 +                                        &(reg->hr_db_pinned),
 +                                        sizeof(*(reg->hr_db_pinned)),
 +                                        O2HB_DB_TYPE_REGION_PINNED,
 +                                        0, 0, reg);
 +      if (!reg->hr_debug_pinned) {
 +              mlog_errno(ret);
 +              goto bail;
 +      }
 +
        ret = 0;
  bail:
        return ret;
@@@ -2031,10 -1964,8 +2031,10 @@@ static struct config_item *o2hb_heartbe
        if (reg == NULL)
                return ERR_PTR(-ENOMEM);
  
 -      if (strlen(name) > O2HB_MAX_REGION_NAME_LEN)
 -              return ERR_PTR(-ENAMETOOLONG);
 +      if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) {
 +              ret = -ENAMETOOLONG;
 +              goto free;
 +      }
  
        spin_lock(&o2hb_live_lock);
        reg->hr_region_num = 0;
                                                         O2NM_MAX_REGIONS);
                if (reg->hr_region_num >= O2NM_MAX_REGIONS) {
                        spin_unlock(&o2hb_live_lock);
 -                      return ERR_PTR(-EFBIG);
 +                      ret = -EFBIG;
 +                      goto free;
                }
                set_bit(reg->hr_region_num, o2hb_region_bitmap);
        }
        ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
        if (ret) {
                config_item_put(&reg->hr_item);
 -              return ERR_PTR(ret);
 +              goto free;
        }
  
        return &reg->hr_item;
 +free:
 +      kfree(reg);
 +      return ERR_PTR(ret);
  }
  
  static void o2hb_heartbeat_group_drop_item(struct config_group *group,
  {
        struct task_struct *hb_task;
        struct o2hb_region *reg = to_o2hb_region(item);
 +      int quorum_region = 0;
  
        /* stop the thread when the user removes the region dir */
        spin_lock(&o2hb_live_lock);
        if (o2hb_global_heartbeat_active()) {
                clear_bit(reg->hr_region_num, o2hb_region_bitmap);
                clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
 +              if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
 +                      quorum_region = 1;
 +              clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
        }
        hb_task = reg->hr_task;
        reg->hr_task = NULL;
 +      reg->hr_item_dropped = 1;
        spin_unlock(&o2hb_live_lock);
  
        if (hb_task)
        if (o2hb_global_heartbeat_active())
                printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n",
                       config_item_name(&reg->hr_item));
 +
        config_item_put(item);
 +
 +      if (!o2hb_global_heartbeat_active() || !quorum_region)
 +              return;
 +
 +      /*
 +       * If global heartbeat active and there are dependent users,
 +       * pin all regions if quorum region count <= CUT_OFF
 +       */
 +      spin_lock(&o2hb_live_lock);
 +
 +      if (!o2hb_dependent_users)
 +              goto unlock;
 +
 +      if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
 +                         O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
 +              o2hb_region_pin(NULL);
 +
 +unlock:
 +      spin_unlock(&o2hb_live_lock);
  }
  
  struct o2hb_heartbeat_group_attribute {
@@@ -2307,138 -2209,63 +2307,138 @@@ void o2hb_setup_callback(struct o2hb_ca
  }
  EXPORT_SYMBOL_GPL(o2hb_setup_callback);
  
 -static struct o2hb_region *o2hb_find_region(const char *region_uuid)
 +/*
 + * In local heartbeat mode, region_uuid passed matches the dlm domain name.
 + * In global heartbeat mode, region_uuid passed is NULL.
 + *
 + * In local, we only pin the matching region. In global we pin all the active
 + * regions.
 + */
 +static int o2hb_region_pin(const char *region_uuid)
  {
 -      struct o2hb_region *p, *reg = NULL;
 +      int ret = 0, found = 0;
 +      struct o2hb_region *reg;
 +      char *uuid;
  
        assert_spin_locked(&o2hb_live_lock);
  
 -      list_for_each_entry(p, &o2hb_all_regions, hr_all_item) {
 -              if (!strcmp(region_uuid, config_item_name(&p->hr_item))) {
 -                      reg = p;
 -                      break;
 +      list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
 +              uuid = config_item_name(&reg->hr_item);
 +
 +              /* local heartbeat */
 +              if (region_uuid) {
 +                      if (strcmp(region_uuid, uuid))
 +                              continue;
 +                      found = 1;
                }
 +
 +              if (reg->hr_item_pinned || reg->hr_item_dropped)
 +                      goto skip_pin;
 +
 +              /* Ignore ENOENT only for local hb (userdlm domain) */
 +              ret = o2nm_depend_item(&reg->hr_item);
 +              if (!ret) {
 +                      mlog(ML_CLUSTER, "Pin region %s\n", uuid);
 +                      reg->hr_item_pinned = 1;
 +              } else {
 +                      if (ret == -ENOENT && found)
 +                              ret = 0;
 +                      else {
 +                              mlog(ML_ERROR, "Pin region %s fails with %d\n",
 +                                   uuid, ret);
 +                              break;
 +                      }
 +              }
 +skip_pin:
 +              if (found)
 +                      break;
        }
  
 -      return reg;
 +      return ret;
  }
  
 -static int o2hb_region_get(const char *region_uuid)
 +/*
 + * In local heartbeat mode, region_uuid passed matches the dlm domain name.
 + * In global heartbeat mode, region_uuid passed is NULL.
 + *
 + * In local, we only unpin the matching region. In global we unpin all the
 + * active regions.
 + */
 +static void o2hb_region_unpin(const char *region_uuid)
  {
 -      int ret = 0;
        struct o2hb_region *reg;
 +      char *uuid;
 +      int found = 0;
  
 -      spin_lock(&o2hb_live_lock);
 +      assert_spin_locked(&o2hb_live_lock);
  
 -      reg = o2hb_find_region(region_uuid);
 -      if (!reg)
 -              ret = -ENOENT;
 -      spin_unlock(&o2hb_live_lock);
 +      list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
 +              uuid = config_item_name(&reg->hr_item);
 +              if (region_uuid) {
 +                      if (strcmp(region_uuid, uuid))
 +                              continue;
 +                      found = 1;
 +              }
  
 -      if (ret)
 -              goto out;
 +              if (reg->hr_item_pinned) {
 +                      mlog(ML_CLUSTER, "Unpin region %s\n", uuid);
 +                      o2nm_undepend_item(&reg->hr_item);
 +                      reg->hr_item_pinned = 0;
 +              }
 +              if (found)
 +                      break;
 +      }
 +}
  
 -      ret = o2nm_depend_this_node();
 -      if (ret)
 -              goto out;
 +static int o2hb_region_inc_user(const char *region_uuid)
 +{
 +      int ret = 0;
  
 -      ret = o2nm_depend_item(&reg->hr_item);
 -      if (ret)
 -              o2nm_undepend_this_node();
 +      spin_lock(&o2hb_live_lock);
  
 -out:
 +      /* local heartbeat */
 +      if (!o2hb_global_heartbeat_active()) {
 +          ret = o2hb_region_pin(region_uuid);
 +          goto unlock;
 +      }
 +
 +      /*
 +       * if global heartbeat active and this is the first dependent user,
 +       * pin all regions if quorum region count <= CUT_OFF
 +       */
 +      o2hb_dependent_users++;
 +      if (o2hb_dependent_users > 1)
 +              goto unlock;
 +
 +      if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
 +                         O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
 +              ret = o2hb_region_pin(NULL);
 +
 +unlock:
 +      spin_unlock(&o2hb_live_lock);
        return ret;
  }
  
 -static void o2hb_region_put(const char *region_uuid)
 +void o2hb_region_dec_user(const char *region_uuid)
  {
 -      struct o2hb_region *reg;
 -
        spin_lock(&o2hb_live_lock);
  
 -      reg = o2hb_find_region(region_uuid);
 +      /* local heartbeat */
 +      if (!o2hb_global_heartbeat_active()) {
 +          o2hb_region_unpin(region_uuid);
 +          goto unlock;
 +      }
  
 -      spin_unlock(&o2hb_live_lock);
 +      /*
 +       * if global heartbeat active and there are no dependent users,
 +       * unpin all quorum regions
 +       */
 +      o2hb_dependent_users--;
 +      if (!o2hb_dependent_users)
 +              o2hb_region_unpin(NULL);
  
 -      if (reg) {
 -              o2nm_undepend_item(&reg->hr_item);
 -              o2nm_undepend_this_node();
 -      }
 +unlock:
 +      spin_unlock(&o2hb_live_lock);
  }
  
  int o2hb_register_callback(const char *region_uuid,
        }
  
        if (region_uuid) {
 -              ret = o2hb_region_get(region_uuid);
 -              if (ret)
 +              ret = o2hb_region_inc_user(region_uuid);
 +              if (ret) {
 +                      mlog_errno(ret);
                        goto out;
 +              }
        }
  
        down_write(&o2hb_callback_sem);
        up_write(&o2hb_callback_sem);
        ret = 0;
  out:
 -      mlog(ML_HEARTBEAT, "returning %d on behalf of %p for funcs %p\n",
 +      mlog(ML_CLUSTER, "returning %d on behalf of %p for funcs %p\n",
             ret, __builtin_return_address(0), hc);
        return ret;
  }
@@@ -2492,7 -2317,7 +2492,7 @@@ void o2hb_unregister_callback(const cha
  {
        BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
  
 -      mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n",
 +      mlog(ML_CLUSTER, "on behalf of %p for funcs %p\n",
             __builtin_return_address(0), hc);
  
        /* XXX Can this happen _with_ a region reference? */
                return;
  
        if (region_uuid)
 -              o2hb_region_put(region_uuid);
 +              o2hb_region_dec_user(region_uuid);
  
        down_write(&o2hb_callback_sem);
  
diff --combined fs/reiserfs/journal.c
@@@ -43,6 -43,7 +43,6 @@@
  #include <linux/fcntl.h>
  #include <linux/stat.h>
  #include <linux/string.h>
 -#include <linux/smp_lock.h>
  #include <linux/buffer_head.h>
  #include <linux/workqueue.h>
  #include <linux/writeback.h>
@@@ -2551,8 -2552,6 +2551,6 @@@ static int release_journal_dev(struct s
        result = 0;
  
        if (journal->j_dev_bd != NULL) {
-               if (journal->j_dev_bd->bd_dev != super->s_dev)
-                       bd_release(journal->j_dev_bd);
                result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
                journal->j_dev_bd = NULL;
        }
@@@ -2570,7 -2569,7 +2568,7 @@@ static int journal_init_dev(struct supe
  {
        int result;
        dev_t jdev;
-       fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE;
+       fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
        char b[BDEVNAME_SIZE];
  
        result = 0;
  
        /* there is no "jdev" option and journal is on separate device */
        if ((!jdev_name || !jdev_name[0])) {
-               journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode);
+               if (jdev == super->s_dev)
+                       blkdev_mode &= ~FMODE_EXCL;
+               journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode,
+                                                     journal);
                journal->j_dev_mode = blkdev_mode;
                if (IS_ERR(journal->j_dev_bd)) {
                        result = PTR_ERR(journal->j_dev_bd);
                                         "cannot init journal device '%s': %i",
                                         __bdevname(jdev, b), result);
                        return result;
-               } else if (jdev != super->s_dev) {
-                       result = bd_claim(journal->j_dev_bd, journal);
-                       if (result) {
-                               blkdev_put(journal->j_dev_bd, blkdev_mode);
-                               return result;
-                       }
+               } else if (jdev != super->s_dev)
                        set_blocksize(journal->j_dev_bd, super->s_blocksize);
-               }
  
                return 0;
        }
  
        journal->j_dev_mode = blkdev_mode;
-       journal->j_dev_bd = open_bdev_exclusive(jdev_name,
-                                               blkdev_mode, journal);
+       journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal);
        if (IS_ERR(journal->j_dev_bd)) {
                result = PTR_ERR(journal->j_dev_bd);
                journal->j_dev_bd = NULL;
diff --combined fs/splice.c
@@@ -682,19 -682,14 +682,14 @@@ static int pipe_to_sendpage(struct pipe
  {
        struct file *file = sd->u.file;
        loff_t pos = sd->pos;
-       int ret, more;
-       ret = buf->ops->confirm(pipe, buf);
-       if (!ret) {
-               more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
-               if (file->f_op && file->f_op->sendpage)
-                       ret = file->f_op->sendpage(file, buf->page, buf->offset,
-                                                  sd->len, &pos, more);
-               else
-                       ret = -EINVAL;
-       }
+       int more;
  
-       return ret;
+       if (!likely(file->f_op && file->f_op->sendpage))
+               return -EINVAL;
+       more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
+       return file->f_op->sendpage(file, buf->page, buf->offset,
+                                   sd->len, &pos, more);
  }
  
  /*
@@@ -727,13 -722,6 +722,6 @@@ int pipe_to_file(struct pipe_inode_inf
        void *fsdata;
        int ret;
  
-       /*
-        * make sure the data in this buffer is uptodate
-        */
-       ret = buf->ops->confirm(pipe, buf);
-       if (unlikely(ret))
-               return ret;
        offset = sd->pos & ~PAGE_CACHE_MASK;
  
        this_len = sd->len;
@@@ -805,12 -793,17 +793,17 @@@ int splice_from_pipe_feed(struct pipe_i
                if (sd->len > sd->total_len)
                        sd->len = sd->total_len;
  
-               ret = actor(pipe, buf, sd);
-               if (ret <= 0) {
+               ret = buf->ops->confirm(pipe, buf);
+               if (unlikely(ret)) {
                        if (ret == -ENODATA)
                                ret = 0;
                        return ret;
                }
+               ret = actor(pipe, buf, sd);
+               if (ret <= 0)
+                       return ret;
                buf->offset += ret;
                buf->len -= ret;
  
@@@ -1044,10 -1037,6 +1037,6 @@@ static int write_pipe_buf(struct pipe_i
        int ret;
        void *data;
  
-       ret = buf->ops->confirm(pipe, buf);
-       if (ret)
-               return ret;
        data = buf->ops->map(pipe, buf, 0);
        ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
        buf->ops->unmap(pipe, buf, data);
@@@ -1311,6 -1300,18 +1300,6 @@@ long do_splice_direct(struct file *in, 
  static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
                               struct pipe_inode_info *opipe,
                               size_t len, unsigned int flags);
 -/*
 - * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
 - * location, so checking ->i_pipe is not enough to verify that this is a
 - * pipe.
 - */
 -static inline struct pipe_inode_info *pipe_info(struct inode *inode)
 -{
 -      if (S_ISFIFO(inode->i_mode))
 -              return inode->i_pipe;
 -
 -      return NULL;
 -}
  
  /*
   * Determine where to splice to/from.
@@@ -1324,8 -1325,8 +1313,8 @@@ static long do_splice(struct file *in, 
        loff_t offset, *off;
        long ret;
  
 -      ipipe = pipe_info(in->f_path.dentry->d_inode);
 -      opipe = pipe_info(out->f_path.dentry->d_inode);
 +      ipipe = get_pipe_info(in);
 +      opipe = get_pipe_info(out);
  
        if (ipipe && opipe) {
                if (off_in || off_out)
@@@ -1495,10 -1496,6 +1484,6 @@@ static int pipe_to_user(struct pipe_ino
        char *src;
        int ret;
  
-       ret = buf->ops->confirm(pipe, buf);
-       if (unlikely(ret))
-               return ret;
        /*
         * See if we can use the atomic maps, by prefaulting in the
         * pages and doing an atomic copy
@@@ -1543,7 -1540,7 +1528,7 @@@ static long vmsplice_to_user(struct fil
        int error;
        long ret;
  
 -      pipe = pipe_info(file->f_path.dentry->d_inode);
 +      pipe = get_pipe_info(file);
        if (!pipe)
                return -EBADF;
  
@@@ -1630,7 -1627,7 +1615,7 @@@ static long vmsplice_to_pipe(struct fil
        };
        long ret;
  
 -      pipe = pipe_info(file->f_path.dentry->d_inode);
 +      pipe = get_pipe_info(file);
        if (!pipe)
                return -EBADF;
  
@@@ -2010,8 -2007,8 +1995,8 @@@ static int link_pipe(struct pipe_inode_
  static long do_tee(struct file *in, struct file *out, size_t len,
                   unsigned int flags)
  {
 -      struct pipe_inode_info *ipipe = pipe_info(in->f_path.dentry->d_inode);
 -      struct pipe_inode_info *opipe = pipe_info(out->f_path.dentry->d_inode);
 +      struct pipe_inode_info *ipipe = get_pipe_info(in);
 +      struct pipe_inode_info *opipe = get_pipe_info(out);
        int ret = -EINVAL;
  
        /*
diff --combined fs/super.c
@@@ -30,7 -30,6 +30,7 @@@
  #include <linux/idr.h>
  #include <linux/mutex.h>
  #include <linux/backing-dev.h>
 +#include <linux/rculist_bl.h>
  #include "internal.h"
  
  
@@@ -72,7 -71,7 +72,7 @@@ static struct super_block *alloc_super(
                INIT_LIST_HEAD(&s->s_files);
  #endif
                INIT_LIST_HEAD(&s->s_instances);
 -              INIT_HLIST_HEAD(&s->s_anon);
 +              INIT_HLIST_BL_HEAD(&s->s_anon);
                INIT_LIST_HEAD(&s->s_inodes);
                INIT_LIST_HEAD(&s->s_dentry_lru);
                init_rwsem(&s->s_umount);
@@@ -767,13 -766,13 +767,13 @@@ struct dentry *mount_bdev(struct file_s
  {
        struct block_device *bdev;
        struct super_block *s;
-       fmode_t mode = FMODE_READ;
+       fmode_t mode = FMODE_READ | FMODE_EXCL;
        int error = 0;
  
        if (!(flags & MS_RDONLY))
                mode |= FMODE_WRITE;
  
-       bdev = open_bdev_exclusive(dev_name, mode, fs_type);
+       bdev = blkdev_get_by_path(dev_name, mode, fs_type);
        if (IS_ERR(bdev))
                return ERR_CAST(bdev);
  
  
                /*
                 * s_umount nests inside bd_mutex during
-                * __invalidate_device().  close_bdev_exclusive()
-                * acquires bd_mutex and can't be called under
-                * s_umount.  Drop s_umount temporarily.  This is safe
-                * as we're holding an active reference.
+                * __invalidate_device().  blkdev_put() acquires
+                * bd_mutex and can't be called under s_umount.  Drop
+                * s_umount temporarily.  This is safe as we're
+                * holding an active reference.
                 */
                up_write(&s->s_umount);
-               close_bdev_exclusive(bdev, mode);
+               blkdev_put(bdev, mode);
                down_write(&s->s_umount);
        } else {
                char b[BDEVNAME_SIZE];
  error_s:
        error = PTR_ERR(s);
  error_bdev:
-       close_bdev_exclusive(bdev, mode);
+       blkdev_put(bdev, mode);
  error:
        return ERR_PTR(error);
  }
@@@ -863,7 -862,8 +863,8 @@@ void kill_block_super(struct super_bloc
        bdev->bd_super = NULL;
        generic_shutdown_super(sb);
        sync_blockdev(bdev);
-       close_bdev_exclusive(bdev, mode);
+       WARN_ON_ONCE(!(mode & FMODE_EXCL));
+       blkdev_put(bdev, mode | FMODE_EXCL);
  }
  
  EXPORT_SYMBOL(kill_block_super);
@@@ -1140,7 -1140,7 +1141,7 @@@ static struct vfsmount *fs_set_subtype(
        return mnt;
  
   err:
 -      mntput(mnt);
 +      mntput_long(mnt);
        return ERR_PTR(err);
  }
  
@@@ -606,7 -606,8 +606,8 @@@ xfs_blkdev_get
  {
        int                     error = 0;
  
-       *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp);
+       *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+                                   mp);
        if (IS_ERR(*bdevp)) {
                error = PTR_ERR(*bdevp);
                printk("XFS: Invalid device [%s], error=%d\n", name, error);
@@@ -620,7 -621,7 +621,7 @@@ xfs_blkdev_put
        struct block_device     *bdev)
  {
        if (bdev)
-               close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
+               blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
  }
  
  /*
@@@ -834,11 -835,8 +835,11 @@@ xfsaild_wakeup
        struct xfs_ail          *ailp,
        xfs_lsn_t               threshold_lsn)
  {
 -      ailp->xa_target = threshold_lsn;
 -      wake_up_process(ailp->xa_task);
 +      /* only ever move the target forwards */
 +      if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) {
 +              ailp->xa_target = threshold_lsn;
 +              wake_up_process(ailp->xa_task);
 +      }
  }
  
  STATIC int
@@@ -850,17 -848,8 +851,17 @@@ xfsaild
        long            tout = 0; /* milliseconds */
  
        while (!kthread_should_stop()) {
 -              schedule_timeout_interruptible(tout ?
 -                              msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
 +              /*
 +               * for short sleeps indicating congestion, don't allow us to
 +               * get woken early. Otherwise all we do is bang on the AIL lock
 +               * without making progress.
 +               */
 +              if (tout && tout <= 20)
 +                      __set_current_state(TASK_KILLABLE);
 +              else
 +                      __set_current_state(TASK_INTERRUPTIBLE);
 +              schedule_timeout(tout ?
 +                               msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
  
                /* swsusp */
                try_to_freeze();
@@@ -947,7 -936,7 +948,7 @@@ out_reclaim
   * Slab object creation initialisation for the XFS inode.
   * This covers only the idempotent fields in the XFS inode;
   * all other fields need to be initialised on allocation
 - * from the slab. This avoids the need to repeatedly intialise
 + * from the slab. This avoids the need to repeatedly initialise
   * fields in the xfs inode that left in the initialise state
   * when freeing the inode.
   */
@@@ -1130,8 -1119,6 +1131,8 @@@ xfs_fs_evict_inode
         */
        ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
        mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
 +      lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
 +                      &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
  
        xfs_inactive(ip);
  }
diff --combined include/linux/blkdev.h
@@@ -115,6 -115,7 +115,7 @@@ struct request 
        void *elevator_private3;
  
        struct gendisk *rq_disk;
+       struct hd_struct *part;
        unsigned long start_time;
  #ifdef CONFIG_BLK_CGROUP
        unsigned long long start_time_ns;
@@@ -250,7 -251,7 +251,7 @@@ struct queue_limits 
  
        unsigned char           misaligned;
        unsigned char           discard_misaligned;
 -      unsigned char           no_cluster;
 +      unsigned char           cluster;
        signed char             discard_zeroes_data;
  };
  
@@@ -380,6 -381,7 +381,6 @@@ struct request_queu
  #endif
  };
  
 -#define QUEUE_FLAG_CLUSTER    0       /* cluster several segments into 1 */
  #define QUEUE_FLAG_QUEUED     1       /* uses generic tag queueing */
  #define QUEUE_FLAG_STOPPED    2       /* queue is stopped */
  #define       QUEUE_FLAG_SYNCFULL     3       /* read queue has been filled */
  #define QUEUE_FLAG_SECDISCARD  19     /* supports SECDISCARD */
  
  #define QUEUE_FLAG_DEFAULT    ((1 << QUEUE_FLAG_IO_STAT) |            \
 -                               (1 << QUEUE_FLAG_CLUSTER) |            \
                                 (1 << QUEUE_FLAG_STACKABLE)    |       \
                                 (1 << QUEUE_FLAG_SAME_COMP)    |       \
                                 (1 << QUEUE_FLAG_ADD_RANDOM))
@@@ -508,11 -511,6 +509,11 @@@ static inline void queue_flag_clear(uns
  
  #define rq_data_dir(rq)               ((rq)->cmd_flags & 1)
  
 +static inline unsigned int blk_queue_cluster(struct request_queue *q)
 +{
 +      return q->limits.cluster;
 +}
 +
  /*
   * We regard a request as sync, if either a read or a sync write
   */
@@@ -646,7 -644,6 +647,6 @@@ static inline void rq_flush_dcache_page
  
  extern int blk_register_queue(struct gendisk *disk);
  extern void blk_unregister_queue(struct gendisk *disk);
- extern void register_disk(struct gendisk *dev);
  extern void generic_make_request(struct bio *bio);
  extern void blk_rq_init(struct request_queue *q, struct request *rq);
  extern void blk_put_request(struct request *);
@@@ -808,7 -805,6 +808,7 @@@ extern struct request_queue *blk_init_a
  extern void blk_cleanup_queue(struct request_queue *);
  extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
  extern void blk_queue_bounce_limit(struct request_queue *, u64);
 +extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
  extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
  extern void blk_queue_max_segments(struct request_queue *, unsigned short);
  extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
@@@ -1256,6 -1252,9 +1256,9 @@@ struct block_device_operations 
        int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
        int (*direct_access) (struct block_device *, sector_t,
                                                void **, unsigned long *);
+       unsigned int (*check_events) (struct gendisk *disk,
+                                     unsigned int clearing);
+       /* ->media_changed() is DEPRECATED, use ->check_events() instead */
        int (*media_changed) (struct gendisk *);
        void (*unlock_native_capacity) (struct gendisk *);
        int (*revalidate_disk) (struct gendisk *);
diff --combined include/linux/fs.h
@@@ -34,9 -34,9 +34,9 @@@
  #define SEEK_MAX      SEEK_END
  
  struct fstrim_range {
 -      uint64_t start;
 -      uint64_t len;
 -      uint64_t minlen;
 +      __u64 start;
 +      __u64 len;
 +      __u64 minlen;
  };
  
  /* And dynamically-tunable limits and defaults: */
@@@ -382,6 -382,7 +382,6 @@@ struct inodes_stat_t 
  #include <linux/path.h>
  #include <linux/stat.h>
  #include <linux/cache.h>
 -#include <linux/kobject.h>
  #include <linux/list.h>
  #include <linux/radix-tree.h>
  #include <linux/prio_tree.h>
  #include <linux/capability.h>
  #include <linux/semaphore.h>
  #include <linux/fiemap.h>
 +#include <linux/rculist_bl.h>
  
  #include <asm/atomic.h>
  #include <asm/byteorder.h>
@@@ -401,7 -401,6 +401,7 @@@ struct hd_geometry
  struct iovec;
  struct nameidata;
  struct kiocb;
 +struct kobject;
  struct pipe_inode_info;
  struct poll_table_struct;
  struct kstatfs;
@@@ -603,7 -602,6 +603,7 @@@ struct address_space_operations 
        sector_t (*bmap)(struct address_space *, sector_t);
        void (*invalidatepage) (struct page *, unsigned long);
        int (*releasepage) (struct page *, gfp_t);
 +      void (*freepage)(struct page *);
        ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
                        loff_t offset, unsigned long nr_segs);
        int (*get_xip_mem)(struct address_space *, pgoff_t, int,
@@@ -664,8 -662,9 +664,9 @@@ struct block_device 
        void *                  bd_claiming;
        void *                  bd_holder;
        int                     bd_holders;
+       bool                    bd_write_holder;
  #ifdef CONFIG_SYSFS
-       struct list_head        bd_holder_list;
+       struct gendisk *        bd_holder_disk; /* for sysfs slave linkng */
  #endif
        struct block_device *   bd_contains;
        unsigned                bd_block_size;
@@@ -734,31 -733,16 +735,31 @@@ struct posix_acl
  #define ACL_NOT_CACHED ((void *)(-1))
  
  struct inode {
 +      /* RCU path lookup touches following: */
 +      umode_t                 i_mode;
 +      uid_t                   i_uid;
 +      gid_t                   i_gid;
 +      const struct inode_operations   *i_op;
 +      struct super_block      *i_sb;
 +
 +      spinlock_t              i_lock; /* i_blocks, i_bytes, maybe i_size */
 +      unsigned int            i_flags;
 +      struct mutex            i_mutex;
 +
 +      unsigned long           i_state;
 +      unsigned long           dirtied_when;   /* jiffies of first dirtying */
 +
        struct hlist_node       i_hash;
        struct list_head        i_wb_list;      /* backing dev IO list */
        struct list_head        i_lru;          /* inode LRU list */
        struct list_head        i_sb_list;
 -      struct list_head        i_dentry;
 +      union {
 +              struct list_head        i_dentry;
 +              struct rcu_head         i_rcu;
 +      };
        unsigned long           i_ino;
        atomic_t                i_count;
        unsigned int            i_nlink;
 -      uid_t                   i_uid;
 -      gid_t                   i_gid;
        dev_t                   i_rdev;
        unsigned int            i_blkbits;
        u64                     i_version;
        struct timespec         i_ctime;
        blkcnt_t                i_blocks;
        unsigned short          i_bytes;
 -      umode_t                 i_mode;
 -      spinlock_t              i_lock; /* i_blocks, i_bytes, maybe i_size */
 -      struct mutex            i_mutex;
        struct rw_semaphore     i_alloc_sem;
 -      const struct inode_operations   *i_op;
        const struct file_operations    *i_fop; /* former ->i_op->default_file_ops */
 -      struct super_block      *i_sb;
        struct file_lock        *i_flock;
        struct address_space    *i_mapping;
        struct address_space    i_data;
        struct hlist_head       i_fsnotify_marks;
  #endif
  
 -      unsigned long           i_state;
 -      unsigned long           dirtied_when;   /* jiffies of first dirtying */
 -
 -      unsigned int            i_flags;
 -
  #ifdef CONFIG_IMA
        /* protected by i_lock */
        unsigned int            i_readcount; /* struct files open RO */
@@@ -1378,13 -1372,13 +1379,13 @@@ struct super_block 
        const struct xattr_handler **s_xattr;
  
        struct list_head        s_inodes;       /* all inodes */
 -      struct hlist_head       s_anon;         /* anonymous dentries for (nfs) exporting */
 +      struct hlist_bl_head    s_anon;         /* anonymous dentries for (nfs) exporting */
  #ifdef CONFIG_SMP
        struct list_head __percpu *s_files;
  #else
        struct list_head        s_files;
  #endif
 -      /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
 +      /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
        struct list_head        s_dentry_lru;   /* unused dentry lru */
        int                     s_nr_dentry_unused;     /* # of dentry on lru */
  
         * generic_show_options()
         */
        char __rcu *s_options;
 +      const struct dentry_operations *s_d_op; /* default d_op for dentries */
  };
  
  extern struct timespec current_fs_time(struct super_block *sb);
@@@ -1552,18 -1545,9 +1553,18 @@@ struct file_operations 
        int (*setlease)(struct file *, long, struct file_lock **);
  };
  
 +#define IPERM_FLAG_RCU        0x0001
 +
  struct inode_operations {
 -      int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
        struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
 +      void * (*follow_link) (struct dentry *, struct nameidata *);
 +      int (*permission) (struct inode *, int, unsigned int);
 +      int (*check_acl)(struct inode *, int, unsigned int);
 +
 +      int (*readlink) (struct dentry *, char __user *,int);
 +      void (*put_link) (struct dentry *, struct nameidata *, void *);
 +
 +      int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
        int (*link) (struct dentry *,struct inode *,struct dentry *);
        int (*unlink) (struct inode *,struct dentry *);
        int (*symlink) (struct inode *,struct dentry *,const char *);
        int (*mknod) (struct inode *,struct dentry *,int,dev_t);
        int (*rename) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *);
 -      int (*readlink) (struct dentry *, char __user *,int);
 -      void * (*follow_link) (struct dentry *, struct nameidata *);
 -      void (*put_link) (struct dentry *, struct nameidata *, void *);
        void (*truncate) (struct inode *);
 -      int (*permission) (struct inode *, int);
 -      int (*check_acl)(struct inode *, int);
        int (*setattr) (struct dentry *, struct iattr *);
        int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
        int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
                          loff_t len);
        int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
                      u64 len);
 -};
 +} ____cacheline_aligned;
  
  struct seq_file;
  
@@@ -1624,6 -1613,7 +1625,6 @@@ struct super_operations 
        ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
  #endif
        int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
 -      int (*trim_fs) (struct super_block *, struct fstrim_range *);
  };
  
  /*
@@@ -1835,9 -1825,7 +1836,9 @@@ struct super_block *sget(struct file_sy
                        int (*set)(struct super_block *,void *),
                        void *data);
  extern struct dentry *mount_pseudo(struct file_system_type *, char *,
 -      const struct super_operations *ops, unsigned long);
 +      const struct super_operations *ops,
 +      const struct dentry_operations *dops,
 +      unsigned long);
  extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
  
  static inline void sb_mark_dirty(struct super_block *sb)
@@@ -2019,7 -2007,6 +2020,6 @@@ extern struct block_device *bdgrab(stru
  extern void bd_set_size(struct block_device *, loff_t size);
  extern void bd_forget(struct inode *inode);
  extern void bdput(struct block_device *);
- extern struct block_device *open_by_devnum(dev_t, fmode_t);
  extern void invalidate_bdev(struct block_device *);
  extern int sync_blockdev(struct block_device *bdev);
  extern struct super_block *freeze_bdev(struct block_device *);
@@@ -2050,16 -2037,20 +2050,20 @@@ extern const struct file_operations def
  extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
  extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
  extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
- extern int blkdev_get(struct block_device *, fmode_t);
- extern int blkdev_put(struct block_device *, fmode_t);
- extern int bd_claim(struct block_device *, void *);
- extern void bd_release(struct block_device *);
+ extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
+ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+                                              void *holder);
+ extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
+                                             void *holder);
+ extern int blkdev_put(struct block_device *bdev, fmode_t mode);
  #ifdef CONFIG_SYSFS
- extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *);
- extern void bd_release_from_disk(struct block_device *, struct gendisk *);
+ extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
  #else
- #define bd_claim_by_disk(bdev, holder, disk)  bd_claim(bdev, holder)
- #define bd_release_from_disk(bdev, disk)      bd_release(bdev)
+ static inline int bd_link_disk_holder(struct block_device *bdev,
+                                     struct gendisk *disk)
+ {
+       return 0;
+ }
  #endif
  #endif
  
@@@ -2095,8 -2086,6 +2099,6 @@@ static inline void unregister_chrdev(un
  extern const char *__bdevname(dev_t, char *buffer);
  extern const char *bdevname(struct block_device *bdev, char *buffer);
  extern struct block_device *lookup_bdev(const char *);
- extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *);
- extern void close_bdev_exclusive(struct block_device *, fmode_t);
  extern void blkdev_show(struct seq_file *,off_t);
  
  #else
@@@ -2171,8 -2160,8 +2173,8 @@@ extern sector_t bmap(struct inode *, se
  #endif
  extern int notify_change(struct dentry *, struct iattr *);
  extern int inode_permission(struct inode *, int);
 -extern int generic_permission(struct inode *, int,
 -              int (*check_acl)(struct inode *, int));
 +extern int generic_permission(struct inode *, int, unsigned int,
 +              int (*check_acl)(struct inode *, int, unsigned int));
  
  static inline bool execute_ok(struct inode *inode)
  {
@@@ -2243,7 -2232,6 +2245,7 @@@ extern void iget_failed(struct inode *)
  extern void end_writeback(struct inode *);
  extern void __destroy_inode(struct inode *);
  extern struct inode *new_inode(struct super_block *);
 +extern void free_inode_nonrcu(struct inode *inode);
  extern int should_remove_suid(struct dentry *);
  extern int file_remove_suid(struct file *);
  
@@@ -2460,10 -2448,6 +2462,10 @@@ static inline ino_t parent_ino(struct d
  {
        ino_t res;
  
 +      /*
 +       * Don't strictly need d_lock here? If the parent ino could change
 +       * then surely we'd have a deeper race in the caller?
 +       */
        spin_lock(&dentry->d_lock);
        res = dentry->d_parent->d_inode->i_ino;
        spin_unlock(&dentry->d_lock);
diff --combined include/scsi/scsi.h
@@@ -104,6 -104,7 +104,7 @@@ struct scsi_cmnd
  #define UNMAP               0x42
  #define READ_TOC              0x43
  #define READ_HEADER           0x44
+ #define GET_EVENT_STATUS_NOTIFICATION 0x4a
  #define LOG_SELECT            0x4c
  #define LOG_SENSE             0x4d
  #define XDWRITEREAD_10        0x53
  #define PERSISTENT_RESERVE_OUT 0x5f
  #define VARIABLE_LENGTH_CMD   0x7f
  #define REPORT_LUNS           0xa0
 +#define SECURITY_PROTOCOL_IN  0xa2
  #define MAINTENANCE_IN        0xa3
  #define MAINTENANCE_OUT       0xa4
  #define MOVE_MEDIUM           0xa5
  #define EXCHANGE_MEDIUM       0xa6
  #define READ_12               0xa8
  #define WRITE_12              0xaa
 +#define READ_MEDIA_SERIAL_NUMBER 0xab
  #define WRITE_VERIFY_12       0xae
  #define VERIFY_12           0xaf
  #define SEARCH_HIGH_12        0xb0
  #define SEARCH_EQUAL_12       0xb1
  #define SEARCH_LOW_12         0xb2
 +#define SECURITY_PROTOCOL_OUT 0xb5
  #define READ_ELEMENT_STATUS   0xb8
  #define SEND_VOLUME_TAG       0xb6
  #define WRITE_LONG_2          0xea
 +#define EXTENDED_COPY         0x83
 +#define RECEIVE_COPY_RESULTS  0x84
 +#define ACCESS_CONTROL_IN     0x86
 +#define ACCESS_CONTROL_OUT    0x87
  #define READ_16               0x88
  #define WRITE_16              0x8a
 +#define READ_ATTRIBUTE        0x8c
 +#define WRITE_ATTRIBUTE             0x8d
  #define VERIFY_16           0x8f
  #define WRITE_SAME_16       0x93
  #define SERVICE_ACTION_IN     0x9e
  /* values for service action in */
  #define       SAI_READ_CAPACITY_16  0x10
  #define SAI_GET_LBA_STATUS    0x12
 +/* values for VARIABLE_LENGTH_CMD service action codes
 + * see spc4r17 Section D.3.5, table D.7 and D.8 */
 +#define VLC_SA_RECEIVE_CREDENTIAL 0x1800
  /* values for maintenance in */
 +#define MI_REPORT_IDENTIFYING_INFORMATION 0x05
  #define MI_REPORT_TARGET_PGS  0x0a
 +#define MI_REPORT_ALIASES     0x0b
 +#define MI_REPORT_SUPPORTED_OPERATION_CODES 0x0c
 +#define MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS 0x0d
 +#define MI_REPORT_PRIORITY   0x0e
 +#define MI_REPORT_TIMESTAMP  0x0f
 +#define MI_MANAGEMENT_PROTOCOL_IN 0x10
  /* values for maintenance out */
 +#define MO_SET_IDENTIFYING_INFORMATION 0x06
  #define MO_SET_TARGET_PGS     0x0a
 +#define MO_CHANGE_ALIASES     0x0b
 +#define MO_SET_PRIORITY       0x0e
 +#define MO_SET_TIMESTAMP      0x0f
 +#define MO_MANAGEMENT_PROTOCOL_OUT 0x10
  /* values for variable length command */
 +#define XDREAD_32           0x03
 +#define XDWRITE_32          0x04
 +#define XPWRITE_32          0x06
 +#define XDWRITEREAD_32              0x07
  #define READ_32                     0x09
  #define VERIFY_32           0x0a
  #define WRITE_32            0x0b
diff --combined kernel/power/swap.c
@@@ -6,7 -6,6 +6,7 @@@
   *
   * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
   * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
 + * Copyright (C) 2010 Bojan Smojver <bojan@rexursive.com>
   *
   * This file is released under the GPLv2.
   *
@@@ -30,7 -29,7 +30,7 @@@
  
  #include "power.h"
  
 -#define HIBERNATE_SIG "LINHIB0001"
 +#define HIBERNATE_SIG "S1SUSPEND"
  
  /*
   *    The swap map is a data structure used for keeping track of each page
@@@ -224,7 -223,7 +224,7 @@@ static int swsusp_swap_check(void
                return res;
  
        root_swap = res;
-       res = blkdev_get(hib_resume_bdev, FMODE_WRITE);
+       res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL);
        if (res)
                return res;
  
@@@ -754,43 -753,30 +754,43 @@@ static int load_image_lzo(struct swap_m
  {
        unsigned int m;
        int error = 0;
 +      struct bio *bio;
        struct timeval start;
        struct timeval stop;
        unsigned nr_pages;
 -      size_t off, unc_len, cmp_len;
 -      unsigned char *unc, *cmp, *page;
 +      size_t i, off, unc_len, cmp_len;
 +      unsigned char *unc, *cmp, *page[LZO_CMP_PAGES];
  
 -      page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
 -      if (!page) {
 -              printk(KERN_ERR "PM: Failed to allocate LZO page\n");
 -              return -ENOMEM;
 +      for (i = 0; i < LZO_CMP_PAGES; i++) {
 +              page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
 +              if (!page[i]) {
 +                      printk(KERN_ERR "PM: Failed to allocate LZO page\n");
 +
 +                      while (i)
 +                              free_page((unsigned long)page[--i]);
 +
 +                      return -ENOMEM;
 +              }
        }
  
        unc = vmalloc(LZO_UNC_SIZE);
        if (!unc) {
                printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
 -              free_page((unsigned long)page);
 +
 +              for (i = 0; i < LZO_CMP_PAGES; i++)
 +                      free_page((unsigned long)page[i]);
 +
                return -ENOMEM;
        }
  
        cmp = vmalloc(LZO_CMP_SIZE);
        if (!cmp) {
                printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
 +
                vfree(unc);
 -              free_page((unsigned long)page);
 +              for (i = 0; i < LZO_CMP_PAGES; i++)
 +                      free_page((unsigned long)page[i]);
 +
                return -ENOMEM;
        }
  
        if (!m)
                m = 1;
        nr_pages = 0;
 +      bio = NULL;
        do_gettimeofday(&start);
  
        error = snapshot_write_next(snapshot);
                goto out_finish;
  
        for (;;) {
 -              error = swap_read_page(handle, page, NULL); /* sync */
 +              error = swap_read_page(handle, page[0], NULL); /* sync */
                if (error)
                        break;
  
 -              cmp_len = *(size_t *)page;
 +              cmp_len = *(size_t *)page[0];
                if (unlikely(!cmp_len ||
                             cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) {
                        printk(KERN_ERR "PM: Invalid LZO compressed length\n");
                        break;
                }
  
 -              memcpy(cmp, page, PAGE_SIZE);
 -              for (off = PAGE_SIZE; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) {
 -                      error = swap_read_page(handle, page, NULL); /* sync */
 +              for (off = PAGE_SIZE, i = 1;
 +                   off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
 +                      error = swap_read_page(handle, page[i], &bio);
                        if (error)
                                goto out_finish;
 +              }
  
 -                      memcpy(cmp + off, page, PAGE_SIZE);
 +              error = hib_wait_on_bio_chain(&bio); /* need all data now */
 +              if (error)
 +                      goto out_finish;
 +
 +              for (off = 0, i = 0;
 +                   off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
 +                      memcpy(cmp + off, page[i], PAGE_SIZE);
                }
  
                unc_len = LZO_UNC_SIZE;
@@@ -879,8 -857,7 +879,8 @@@ out_finish
  
        vfree(cmp);
        vfree(unc);
 -      free_page((unsigned long)page);
 +      for (i = 0; i < LZO_CMP_PAGES; i++)
 +              free_page((unsigned long)page[i]);
  
        return error;
  }
  /**
   *    swsusp_read - read the hibernation image.
   *    @flags_p: flags passed by the "frozen" kernel in the image header should
 - *              be written into this memeory location
 + *              be written into this memory location
   */
  
  int swsusp_read(unsigned int *flags_p)
@@@ -930,7 -907,8 +930,8 @@@ int swsusp_check(void
  {
        int error;
  
-       hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
+       hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
+                                           FMODE_READ, NULL);
        if (!IS_ERR(hib_resume_bdev)) {
                set_blocksize(hib_resume_bdev, PAGE_SIZE);
                clear_page(swsusp_header);