Merge branch 'for-4.2/writeback' of git://git.kernel.dk/linux-block

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 25 Jun 2015 23:00:17 +0000 (16:00 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 25 Jun 2015 23:00:17 +0000 (16:00 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 25 Jun 2015 23:00:17 +0000 (16:00 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 25 Jun 2015 23:00:17 +0000 (16:00 -0700)
diff --cc block/blk-cgroup.c

index 6e43fa3,31610ae..9f97da5
--- 1/block/blk-cgroup.c
--- 2/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@@ -30,9 -27,12 +31,11 @@@
   
   static DEFINE_MUTEX(blkcg_pol_mutex);
   
- -struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT,
- -                          .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, };
+ +struct blkcg blkcg_root;
   EXPORT_SYMBOL_GPL(blkcg_root);
   
+ struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css;
+ 
   static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
   
   static bool blkcg_policy_enabled(struct request_queue *q,
@@@ -868,16 -843,10 +872,18 @@@ done
         spin_lock_init(&blkcg->lock);
         INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
         INIT_HLIST_HEAD(&blkcg->blkg_list);
- 
+ #ifdef CONFIG_CGROUP_WRITEBACK
+       INIT_LIST_HEAD(&blkcg->cgwb_list);
+ #endif
         return &blkcg->css;
+ +
+ +free_pd_blkcg:
+ +      for (i--; i >= 0; i--)
+ +              kfree(blkcg->pd[i]);
+ +
+ +free_blkcg:
+ +      kfree(blkcg);
+ +      return ret;
   }
   
   /**
@@@ -995,57 -1000,20 +1037,26 @@@ int blkcg_activate_policy(struct reques
                           const struct blkcg_policy *pol)
   {
         LIST_HEAD(pds);
-       struct blkcg_gq *blkg, *new_blkg;
+ +      LIST_HEAD(cpds);
- -      struct blkg_policy_data *pd, *n;
+       struct blkcg_gq *blkg;
+ +      struct blkg_policy_data *pd, *nd;
+ +      struct blkcg_policy_data *cpd, *cnd;
         int cnt = 0, ret;
-       bool preloaded;
   
         if (blkcg_policy_enabled(q, pol))
                 return 0;
   
-       /* preallocations for root blkg */
-       new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
-       if (!new_blkg)
-               return -ENOMEM;
- 
+       /* count and allocate policy_data for all existing blkgs */
         blk_queue_bypass_start(q);
- 
-       preloaded = !radix_tree_preload(GFP_KERNEL);
- 
-       /*
-        * Make sure the root blkg exists and count the existing blkgs.  As
-        * @q is bypassing at this point, blkg_lookup_create() can't be
-        * used.  Open code it.
-        */
         spin_lock_irq(q->queue_lock);
- 
-       rcu_read_lock();
-       blkg = __blkg_lookup(&blkcg_root, q, false);
-       if (blkg)
-               blkg_free(new_blkg);
-       else
-               blkg = blkg_create(&blkcg_root, q, new_blkg);
-       rcu_read_unlock();
- 
-       if (preloaded)
-               radix_tree_preload_end();
- 
-       if (IS_ERR(blkg)) {
-               ret = PTR_ERR(blkg);
-               goto out_unlock;
-       }
- 
         list_for_each_entry(blkg, &q->blkg_list, q_node)
                 cnt++;
- 
         spin_unlock_irq(q->queue_lock);
   
+ +      /*
+ +       * Allocate per-blkg and per-blkcg policy data
+ +       * for all existing blkgs.
+ +       */
         while (cnt--) {
                 pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
                 if (!pd) {
diff --cc block/blk-core.c
Simple merge
diff --cc block/blk-sysfs.c
Simple merge
diff --cc block/bounce.c
Simple merge
diff --cc block/cfq-iosched.c
Simple merge
diff --cc block/elevator.c
Simple merge
diff --cc block/genhd.c
Simple merge
diff --cc drivers/md/dm.c
Simple merge
diff --cc drivers/md/raid10.c
Simple merge
diff --cc fs/ext4/extents.c
Simple merge
diff --cc fs/ext4/mballoc.c
Simple merge
diff --cc fs/ext4/super.c
Simple merge
diff --cc fs/f2fs/node.c
Simple merge
diff --cc fs/f2fs/segment.h
Simple merge
diff --cc fs/inode.c
Simple merge
diff --cc fs/nfs/write.c
Simple merge
diff --cc fs/ocfs2/file.c
Simple merge
diff --cc fs/xfs/xfs_file.c
Simple merge
diff --cc include/linux/backing-dev.h

index d87d8ec,a13181a..0e6d482
--- 1/include/linux/backing-dev.h
--- 2/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@@ -116,13 -23,13 +23,12 @@@ __printf(3, 4
   int bdi_register(struct backing_dev_info *bdi, struct device *parent,
                 const char *fmt, ...);
   int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
- -void bdi_unregister(struct backing_dev_info *bdi);
   int __must_check bdi_setup_and_register(struct backing_dev_info *, char *);
- void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
-                       enum wb_reason reason);
- void bdi_start_background_writeback(struct backing_dev_info *bdi);
- void bdi_writeback_workfn(struct work_struct *work);
- int bdi_has_dirty_io(struct backing_dev_info *bdi);
- void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
+ void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
+                       bool range_cyclic, enum wb_reason reason);
+ void wb_start_background_writeback(struct bdi_writeback *wb);
+ void wb_workfn(struct work_struct *work);
+ void wb_wakeup_delayed(struct bdi_writeback *wb);
   
   extern spinlock_t bdi_lock;
   extern struct list_head bdi_list;
diff --cc include/linux/blk-cgroup.h

index 74296a7,07a32b8..58cfab8
--- 1/block/blk-cgroup.h
--- 2/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@@ -45,7 -50,13 +45,11 @@@ struct blkcg 
         struct blkcg_gq                 *blkg_hint;
         struct hlist_head               blkg_list;
   
- -      /* TODO: per-policy storage in blkcg */
- -      unsigned int                    cfq_weight;     /* belongs to cfq */
- -      unsigned int                    cfq_leaf_weight;
+ +      struct blkcg_policy_data        *pd[BLKCG_MAX_POLS];
+ 
+ #ifdef CONFIG_CGROUP_WRITEBACK
+       struct list_head                cgwb_list;
+ #endif
   };
   
   struct blkg_stat {
diff --cc include/linux/blkdev.h

index 5ced29c,ab4a278..7f2f54b
--- 1/include/linux/blkdev.h
--- 2/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@@ -787,25 -788,8 +787,6 @@@ extern int scsi_cmd_ioctl(struct reques
   extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
                          struct scsi_ioctl_command __user *);
   
- /*
-  * A queue has just exitted congestion.  Note this in the global counter of
-  * congested queues, and wake up anyone who was waiting for requests to be
-  * put back.
-  */
- static inline void blk_clear_queue_congested(struct request_queue *q, int sync)
- {
-       clear_bdi_congested(&q->backing_dev_info, sync);
- }
- 
- /*
-  * A queue has just entered congestion.  Flag that in the queue's VM-visible
-  * state flags and increment the global gounter of congested queues.
-  */
- static inline void blk_set_queue_congested(struct request_queue *q, int sync)
- {
-       set_bdi_congested(&q->backing_dev_info, sync);
- }
- -extern void blk_queue_bio(struct request_queue *q, struct bio *bio);
--
   extern void blk_start_queue(struct request_queue *q);
   extern void blk_stop_queue(struct request_queue *q);
   extern void blk_sync_queue(struct request_queue *q);
diff --cc include/linux/fs.h
Simple merge
diff --cc include/linux/memcontrol.h
Simple merge
diff --cc include/linux/mm.h
Simple merge
diff --cc include/trace/events/writeback.h
Simple merge
diff --cc init/Kconfig
Simple merge
diff --cc mm/backing-dev.c

index 000e7b3,436bb53..7756da3
--- 1/mm/backing-dev.c
--- 2/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@@ -387,49 -746,91 +746,74 @@@ int bdi_init(struct backing_dev_info *b
         bdi->min_ratio = 0;
         bdi->max_ratio = 100;
         bdi->max_prop_frac = FPROP_FRAC_BASE;
-       spin_lock_init(&bdi->wb_lock);
         INIT_LIST_HEAD(&bdi->bdi_list);
-       INIT_LIST_HEAD(&bdi->work_list);
+       init_waitqueue_head(&bdi->wb_waitq);
   
-       bdi_wb_init(&bdi->wb, bdi);
+       err = wb_init(&bdi->wb, bdi, GFP_KERNEL);
+       if (err)
+               return err;
   
-       for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
-               err = percpu_counter_init(&bdi->bdi_stat[i], 0, GFP_KERNEL);
-               if (err)
-                       goto err;
-       }
+       bdi->wb_congested.state = 0;
+       bdi->wb.congested = &bdi->wb_congested;
   
-       bdi->dirty_exceeded = 0;
+       cgwb_bdi_init(bdi);
+       return 0;
+ }
+ EXPORT_SYMBOL(bdi_init);
   
-       bdi->bw_time_stamp = jiffies;
-       bdi->written_stamp = 0;
+ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
+               const char *fmt, ...)
+ {
+       va_list args;
+       struct device *dev;
   
-       bdi->balanced_dirty_ratelimit = INIT_BW;
-       bdi->dirty_ratelimit = INIT_BW;
-       bdi->write_bandwidth = INIT_BW;
-       bdi->avg_write_bandwidth = INIT_BW;
+       if (bdi->dev)   /* The driver needs to use separate queues per device */
+               return 0;
   
-       err = fprop_local_init_percpu(&bdi->completions, GFP_KERNEL);
+       va_start(args, fmt);
+       dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
+       va_end(args);
+       if (IS_ERR(dev))
+               return PTR_ERR(dev);
   
-       if (err) {
- err:
-               while (i--)
-                       percpu_counter_destroy(&bdi->bdi_stat[i]);
-       }
+       bdi->dev = dev;
   
-       return err;
+       bdi_debug_register(bdi, dev_name(dev));
+       set_bit(WB_registered, &bdi->wb.state);
+ 
+       spin_lock_bh(&bdi_lock);
+       list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
+       spin_unlock_bh(&bdi_lock);
+ 
+       trace_writeback_bdi_register(bdi);
+       return 0;
   }
- EXPORT_SYMBOL(bdi_init);
+ EXPORT_SYMBOL(bdi_register);
   
- void bdi_destroy(struct backing_dev_info *bdi)
+ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
   {
-       int i;
+       return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev));
+ }
+ EXPORT_SYMBOL(bdi_register_dev);
+ 
+ /*
+  * Remove bdi from bdi_list, and ensure that it is no longer visible
+  */
+ static void bdi_remove_from_list(struct backing_dev_info *bdi)
+ {
+       spin_lock_bh(&bdi_lock);
+       list_del_rcu(&bdi->bdi_list);
+       spin_unlock_bh(&bdi_lock);
   
-       bdi_wb_shutdown(bdi);
-       bdi_set_min_ratio(bdi, 0);
+       synchronize_rcu_expedited();
+ }
   
-       WARN_ON(!list_empty(&bdi->work_list));
-       WARN_ON(delayed_work_pending(&bdi->wb.dwork));
- -/*
- - * Called when the device behind @bdi has been removed or ejected.
- - *
- - * We can't really do much here except for reducing the dirty ratio at
- - * the moment.  In the future we should be able to set a flag so that
- - * the filesystem can handle errors at mark_inode_dirty time instead
- - * of only at writeback time.
- - */
- -void bdi_unregister(struct backing_dev_info *bdi)
- -{
- -      if (WARN_ON_ONCE(!bdi->dev))
- -              return;
- -
- -      bdi_set_min_ratio(bdi, 0);
- -}
- -EXPORT_SYMBOL(bdi_unregister);
- -
+ void bdi_destroy(struct backing_dev_info *bdi)
+ {
+       /* make sure nobody finds us on the bdi_list anymore */
+       bdi_remove_from_list(bdi);
+       wb_shutdown(&bdi->wb);
+       cgwb_bdi_destroy(bdi);
   
         if (bdi->dev) {
                 bdi_debug_unregister(bdi);
diff --cc mm/filemap.c

index 8d17cee,bfc1ab0..11f10ef
--- 1/mm/filemap.c
--- 2/mm/filemap.c
+++ b/mm/filemap.c
@@@ -485,15 -498,11 +500,16 @@@ int replace_page_cache_page(struct pag
                 error = radix_tree_insert(&mapping->page_tree, offset, new);
                 BUG_ON(error);
                 mapping->nrpages++;
- -              __inc_zone_page_state(new, NR_FILE_PAGES);
+ +
+ +              /*
+ +               * hugetlb pages do not participate in page cache accounting.
+ +               */
+ +              if (!PageHuge(new))
+ +                      __inc_zone_page_state(new, NR_FILE_PAGES);
                 if (PageSwapBacked(new))
                         __inc_zone_page_state(new, NR_SHMEM);
-               spin_unlock_irq(&mapping->tree_lock);
+               spin_unlock_irqrestore(&mapping->tree_lock, flags);
+               mem_cgroup_end_page_stat(memcg);
                 mem_cgroup_migrate(old, new, true);
                 radix_tree_preload_end();
                 if (freepage)
diff --cc mm/memcontrol.c
Simple merge
diff --cc mm/page-writeback.c

index eb59f7e,e1514d5..22cddd3
--- 1/mm/page-writeback.c
--- 2/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@@ -802,27 -990,27 +990,27 @@@ static void wb_position_ratio(struct di
          * threshold, so that the occasional writes won't be blocked and active
          * writes can rampup the threshold quickly.
          */
-       bdi_thresh = max(bdi_thresh, (limit - dirty) / 8);
+       wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
         /*
-        * scale global setpoint to bdi's:
-        *      bdi_setpoint = setpoint * bdi_thresh / thresh
+        * scale global setpoint to wb's:
+        *      wb_setpoint = setpoint * wb_thresh / thresh
          */
-       x = div_u64((u64)bdi_thresh << 16, thresh | 1);
-       bdi_setpoint = setpoint * (u64)x >> 16;
- -      x = div_u64((u64)wb_thresh << 16, dtc->thresh + 1);
++      x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1);
+       wb_setpoint = setpoint * (u64)x >> 16;
         /*
-        * Use span=(8*write_bw) in single bdi case as indicated by
-        * (thresh - bdi_thresh ~= 0) and transit to bdi_thresh in JBOD case.
+        * Use span=(8*write_bw) in single wb case as indicated by
+        * (thresh - wb_thresh ~= 0) and transit to wb_thresh in JBOD case.
          *
-        *        bdi_thresh                    thresh - bdi_thresh
-        * span = ---------- * (8 * write_bw) + ------------------- * bdi_thresh
-        *          thresh                            thresh
+        *        wb_thresh                    thresh - wb_thresh
+        * span = --------- * (8 * write_bw) + ------------------ * wb_thresh
+        *         thresh                           thresh
          */
-       span = (thresh - bdi_thresh + 8 * write_bw) * (u64)x >> 16;
-       x_intercept = bdi_setpoint + span;
+       span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16;
+       x_intercept = wb_setpoint + span;
   
-       if (bdi_dirty < x_intercept - span / 4) {
-               pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),
-                                     (x_intercept - bdi_setpoint) | 1);
+       if (dtc->wb_dirty < x_intercept - span / 4) {
+               pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty),
- -                                    x_intercept - wb_setpoint + 1);
++                                    (x_intercept - wb_setpoint) | 1);
         } else
                 pos_ratio /= 4;
   
diff --cc mm/rmap.c
Simple merge
diff --cc mm/vmscan.c
Simple merge
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 25 Jun 2015 23:00:17 +0000 (16:00 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 25 Jun 2015 23:00:17 +0000 (16:00 -0700)
		1	2
block/blk-cgroup.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-core.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-sysfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/bounce.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/cfq-iosched.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/elevator.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/genhd.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/raid10.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/extents.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/mballoc.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/f2fs/node.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/f2fs/segment.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfs/write.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ocfs2/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_file.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/backing-dev.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/blk-cgroup.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/blkdev.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/memcontrol.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mm.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/trace/events/writeback.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
mm/backing-dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/filemap.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/memcontrol.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/page-writeback.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/rmap.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/vmscan.c	patch \|	diff1 \|	diff2 \|	blob \| history