blkcg: remove unused @pol and @plid parameters
[platform/adaptation/renesas_rcar/renesas_kernel.git] / block / cfq-iosched.c
index 4572952..8cca616 100644 (file)
@@ -17,6 +17,8 @@
 #include "blk.h"
 #include "cfq.h"
 
+static struct blkio_policy_type blkio_policy_cfq;
+
 /*
  * tunables
  */
@@ -206,11 +208,7 @@ struct cfq_group {
        unsigned long saved_workload_slice;
        enum wl_type_t saved_workload;
        enum wl_prio_t saved_serving_prio;
-       struct blkio_group blkg;
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-       struct hlist_node cfqd_node;
-       int ref;
-#endif
+
        /* number of requests that are on the dispatch list or inside driver */
        int dispatched;
        struct cfq_ttime ttime;
@@ -220,6 +218,10 @@ struct cfq_io_cq {
        struct io_cq            icq;            /* must be the first member */
        struct cfq_queue        *cfqq[2];
        struct cfq_ttime        ttime;
+       int                     ioprio;         /* the current ioprio */
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+       uint64_t                blkcg_id;       /* the current blkcg ID */
+#endif
 };
 
 /*
@@ -229,7 +231,7 @@ struct cfq_data {
        struct request_queue *queue;
        /* Root service tree for cfq_groups */
        struct cfq_rb_root grp_service_tree;
-       struct cfq_group root_group;
+       struct cfq_group *root_group;
 
        /*
         * The priority currently being served
@@ -302,12 +304,6 @@ struct cfq_data {
        struct cfq_queue oom_cfqq;
 
        unsigned long last_delayed_sync;
-
-       /* List of cfq groups being managed on this device*/
-       struct hlist_head cfqg_list;
-
-       /* Number of groups which are on blkcg->blkg_list */
-       unsigned int nr_blkcg_linked_grps;
 };
 
 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
@@ -371,20 +367,48 @@ CFQ_CFQQ_FNS(wait_busy);
 #undef CFQ_CFQQ_FNS
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
+static inline struct cfq_group *blkg_to_cfqg(struct blkio_group *blkg)
+{
+       return blkg_to_pdata(blkg, &blkio_policy_cfq);
+}
+
+static inline struct blkio_group *cfqg_to_blkg(struct cfq_group *cfqg)
+{
+       return pdata_to_blkg(cfqg);
+}
+
+static inline void cfqg_get(struct cfq_group *cfqg)
+{
+       return blkg_get(cfqg_to_blkg(cfqg));
+}
+
+static inline void cfqg_put(struct cfq_group *cfqg)
+{
+       return blkg_put(cfqg_to_blkg(cfqg));
+}
+
 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
        blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \
                        cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
-                       blkg_path(&(cfqq)->cfqg->blkg), ##args)
+                       blkg_path(cfqg_to_blkg((cfqq)->cfqg)), ##args)
 
 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...)                         \
        blk_add_trace_msg((cfqd)->queue, "%s " fmt,                     \
-                               blkg_path(&(cfqg)->blkg), ##args)       \
+                       blkg_path(cfqg_to_blkg((cfqg))), ##args)        \
+
+#else  /* CONFIG_CFQ_GROUP_IOSCHED */
+
+static inline struct cfq_group *blkg_to_cfqg(struct blkio_group *blkg) { return NULL; }
+static inline struct blkio_group *cfqg_to_blkg(struct cfq_group *cfqg) { return NULL; }
+static inline void cfqg_get(struct cfq_group *cfqg) { }
+static inline void cfqg_put(struct cfq_group *cfqg) { }
 
-#else
 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
        blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args)
 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...)         do {} while (0)
-#endif
+
+#endif /* CONFIG_CFQ_GROUP_IOSCHED */
+
 #define cfq_log(cfqd, fmt, args...)    \
        blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
 
@@ -465,8 +489,9 @@ static inline int cfqg_busy_async_queues(struct cfq_data *cfqd,
 }
 
 static void cfq_dispatch_insert(struct request_queue *, struct request *);
-static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
-                                      struct io_context *, gfp_t);
+static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, bool is_sync,
+                                      struct cfq_io_cq *cic, struct bio *bio,
+                                      gfp_t gfp_mask);
 
 static inline struct cfq_io_cq *icq_to_cic(struct io_cq *icq)
 {
@@ -935,7 +960,8 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
        cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
        cfq_group_service_tree_del(st, cfqg);
        cfqg->saved_workload_slice = 0;
-       cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
+       cfq_blkiocg_update_dequeue_stats(cfqg_to_blkg(cfqg),
+                                        &blkio_policy_cfq, 1);
 }
 
 static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
@@ -1007,178 +1033,70 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
                     "sl_used=%u disp=%u charge=%u iops=%u sect=%lu",
                     used_sl, cfqq->slice_dispatch, charge,
                     iops_mode(cfqd), cfqq->nr_sectors);
-       cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl,
-                                         unaccounted_sl);
-       cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
-}
-
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
-{
-       if (blkg)
-               return container_of(blkg, struct cfq_group, blkg);
-       return NULL;
-}
-
-static void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
-                                         unsigned int weight)
-{
-       struct cfq_group *cfqg = cfqg_of_blkg(blkg);
-       cfqg->new_weight = weight;
-       cfqg->needs_update = true;
+       cfq_blkiocg_update_timeslice_used(cfqg_to_blkg(cfqg), &blkio_policy_cfq,
+                                         used_sl, unaccounted_sl);
+       cfq_blkiocg_set_start_empty_time(cfqg_to_blkg(cfqg), &blkio_policy_cfq);
 }
 
-static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd,
-                       struct cfq_group *cfqg, struct blkio_cgroup *blkcg)
-{
-       struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
-       unsigned int major, minor;
-
-       /*
-        * Add group onto cgroup list. It might happen that bdi->dev is
-        * not initialized yet. Initialize this new group without major
-        * and minor info and this info will be filled in once a new thread
-        * comes for IO.
-        */
-       if (bdi->dev) {
-               sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-               cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
-                                       (void *)cfqd, MKDEV(major, minor));
-       } else
-               cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
-                                       (void *)cfqd, 0);
-
-       cfqd->nr_blkcg_linked_grps++;
-       cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
-
-       /* Add group on cfqd list */
-       hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
-}
-
-/*
- * Should be called from sleepable context. No request queue lock as per
- * cpu stats are allocated dynamically and alloc_percpu needs to be called
- * from sleepable context.
+/**
+ * cfq_init_cfqg_base - initialize base part of a cfq_group
+ * @cfqg: cfq_group to initialize
+ *
+ * Initialize the base part which is used whether %CONFIG_CFQ_GROUP_IOSCHED
+ * is enabled or not.
  */
-static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
+static void cfq_init_cfqg_base(struct cfq_group *cfqg)
 {
-       struct cfq_group *cfqg = NULL;
-       int i, j, ret;
        struct cfq_rb_root *st;
-
-       cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
-       if (!cfqg)
-               return NULL;
+       int i, j;
 
        for_each_cfqg_st(cfqg, i, j, st)
                *st = CFQ_RB_ROOT;
        RB_CLEAR_NODE(&cfqg->rb_node);
 
        cfqg->ttime.last_end_request = jiffies;
-
-       /*
-        * Take the initial reference that will be released on destroy
-        * This can be thought of a joint reference by cgroup and
-        * elevator which will be dropped by either elevator exit
-        * or cgroup deletion path depending on who is exiting first.
-        */
-       cfqg->ref = 1;
-
-       ret = blkio_alloc_blkg_stats(&cfqg->blkg);
-       if (ret) {
-               kfree(cfqg);
-               return NULL;
-       }
-
-       return cfqg;
 }
 
-static struct cfq_group *
-cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg)
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+static void cfq_update_blkio_group_weight(struct request_queue *q,
+                                         struct blkio_group *blkg,
+                                         unsigned int weight)
 {
-       struct cfq_group *cfqg = NULL;
-       void *key = cfqd;
-       struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
-       unsigned int major, minor;
+       struct cfq_group *cfqg = blkg_to_cfqg(blkg);
 
-       /*
-        * This is the common case when there are no blkio cgroups.
-        * Avoid lookup in this case
-        */
-       if (blkcg == &blkio_root_cgroup)
-               cfqg = &cfqd->root_group;
-       else
-               cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
+       cfqg->new_weight = weight;
+       cfqg->needs_update = true;
+}
 
-       if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
-               sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-               cfqg->blkg.dev = MKDEV(major, minor);
-       }
+static void cfq_init_blkio_group(struct blkio_group *blkg)
+{
+       struct cfq_group *cfqg = blkg_to_cfqg(blkg);
 
-       return cfqg;
+       cfq_init_cfqg_base(cfqg);
+       cfqg->weight = blkg->blkcg->weight;
 }
 
 /*
  * Search for the cfq group current task belongs to. request_queue lock must
  * be held.
  */
-static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
+static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
+                                               struct blkio_cgroup *blkcg)
 {
-       struct blkio_cgroup *blkcg;
-       struct cfq_group *cfqg = NULL, *__cfqg = NULL;
        struct request_queue *q = cfqd->queue;
+       struct cfq_group *cfqg = NULL;
 
-       rcu_read_lock();
-       blkcg = task_blkio_cgroup(current);
-       cfqg = cfq_find_cfqg(cfqd, blkcg);
-       if (cfqg) {
-               rcu_read_unlock();
-               return cfqg;
-       }
-
-       /*
-        * Need to allocate a group. Allocation of group also needs allocation
-        * of per cpu stats which in-turn takes a mutex() and can block. Hence
-        * we need to drop rcu lock and queue_lock before we call alloc.
-        *
-        * Not taking any queue reference here and assuming that queue is
-        * around by the time we return. CFQ queue allocation code does
-        * the same. It might be racy though.
-        */
-
-       rcu_read_unlock();
-       spin_unlock_irq(q->queue_lock);
-
-       cfqg = cfq_alloc_cfqg(cfqd);
-
-       spin_lock_irq(q->queue_lock);
-
-       rcu_read_lock();
-       blkcg = task_blkio_cgroup(current);
-
-       /*
-        * If some other thread already allocated the group while we were
-        * not holding queue lock, free up the group
-        */
-       __cfqg = cfq_find_cfqg(cfqd, blkcg);
+       /* avoid lookup for the common case where there's no blkio cgroup */
+       if (blkcg == &blkio_root_cgroup) {
+               cfqg = cfqd->root_group;
+       } else {
+               struct blkio_group *blkg;
 
-       if (__cfqg) {
-               kfree(cfqg);
-               rcu_read_unlock();
-               return __cfqg;
+               blkg = blkg_lookup_create(blkcg, q, false);
+               if (!IS_ERR(blkg))
+                       cfqg = blkg_to_cfqg(blkg);
        }
 
-       if (!cfqg)
-               cfqg = &cfqd->root_group;
-
-       cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg);
-       rcu_read_unlock();
-       return cfqg;
-}
-
-static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
-{
-       cfqg->ref++;
        return cfqg;
 }
 
@@ -1186,94 +1104,18 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
 {
        /* Currently, all async queues are mapped to root group */
        if (!cfq_cfqq_sync(cfqq))
-               cfqg = &cfqq->cfqd->root_group;
+               cfqg = cfqq->cfqd->root_group;
 
        cfqq->cfqg = cfqg;
        /* cfqq reference on cfqg */
-       cfqq->cfqg->ref++;
-}
-
-static void cfq_put_cfqg(struct cfq_group *cfqg)
-{
-       struct cfq_rb_root *st;
-       int i, j;
-
-       BUG_ON(cfqg->ref <= 0);
-       cfqg->ref--;
-       if (cfqg->ref)
-               return;
-       for_each_cfqg_st(cfqg, i, j, st)
-               BUG_ON(!RB_EMPTY_ROOT(&st->rb));
-       free_percpu(cfqg->blkg.stats_cpu);
-       kfree(cfqg);
-}
-
-static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
-{
-       /* Something wrong if we are trying to remove same group twice */
-       BUG_ON(hlist_unhashed(&cfqg->cfqd_node));
-
-       hlist_del_init(&cfqg->cfqd_node);
-
-       BUG_ON(cfqd->nr_blkcg_linked_grps <= 0);
-       cfqd->nr_blkcg_linked_grps--;
-
-       /*
-        * Put the reference taken at the time of creation so that when all
-        * queues are gone, group can be destroyed.
-        */
-       cfq_put_cfqg(cfqg);
-}
-
-static void cfq_release_cfq_groups(struct cfq_data *cfqd)
-{
-       struct hlist_node *pos, *n;
-       struct cfq_group *cfqg;
-
-       hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) {
-               /*
-                * If cgroup removal path got to blk_group first and removed
-                * it from cgroup list, then it will take care of destroying
-                * cfqg also.
-                */
-               if (!cfq_blkiocg_del_blkio_group(&cfqg->blkg))
-                       cfq_destroy_cfqg(cfqd, cfqg);
-       }
-}
-
-/*
- * Blk cgroup controller notification saying that blkio_group object is being
- * delinked as associated cgroup object is going away. That also means that
- * no new IO will come in this group. So get rid of this group as soon as
- * any pending IO in the group is finished.
- *
- * This function is called under rcu_read_lock(). key is the rcu protected
- * pointer. That means "key" is a valid cfq_data pointer as long as we are rcu
- * read lock.
- *
- * "key" was fetched from blkio_group under blkio_cgroup->lock. That means
- * it should not be NULL as even if elevator was exiting, cgroup deltion
- * path got to it first.
- */
-static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
-{
-       unsigned long  flags;
-       struct cfq_data *cfqd = key;
-
-       spin_lock_irqsave(cfqd->queue->queue_lock, flags);
-       cfq_destroy_cfqg(cfqd, cfqg_of_blkg(blkg));
-       spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
+       cfqg_get(cfqg);
 }
 
 #else /* GROUP_IOSCHED */
-static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
+static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
+                                               struct blkio_cgroup *blkcg)
 {
-       return &cfqd->root_group;
-}
-
-static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
-{
-       return cfqg;
+       return cfqd->root_group;
 }
 
 static inline void
@@ -1281,9 +1123,6 @@ cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
        cfqq->cfqg = cfqg;
 }
 
-static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
-static inline void cfq_put_cfqg(struct cfq_group *cfqg) {}
-
 #endif /* GROUP_IOSCHED */
 
 /*
@@ -1550,12 +1389,14 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
 {
        elv_rb_del(&cfqq->sort_list, rq);
        cfqq->queued[rq_is_sync(rq)]--;
-       cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
-                                       rq_data_dir(rq), rq_is_sync(rq));
+       cfq_blkiocg_update_io_remove_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+                                          &blkio_policy_cfq, rq_data_dir(rq),
+                                          rq_is_sync(rq));
        cfq_add_rq_rb(rq);
-       cfq_blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
-                       &cfqq->cfqd->serving_group->blkg, rq_data_dir(rq),
-                       rq_is_sync(rq));
+       cfq_blkiocg_update_io_add_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+                                       &blkio_policy_cfq,
+                                       cfqg_to_blkg(cfqq->cfqd->serving_group),
+                                       rq_data_dir(rq), rq_is_sync(rq));
 }
 
 static struct request *
@@ -1611,8 +1452,9 @@ static void cfq_remove_request(struct request *rq)
        cfq_del_rq_rb(rq);
 
        cfqq->cfqd->rq_queued--;
-       cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
-                                       rq_data_dir(rq), rq_is_sync(rq));
+       cfq_blkiocg_update_io_remove_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+                                          &blkio_policy_cfq, rq_data_dir(rq),
+                                          rq_is_sync(rq));
        if (rq->cmd_flags & REQ_PRIO) {
                WARN_ON(!cfqq->prio_pending);
                cfqq->prio_pending--;
@@ -1647,8 +1489,9 @@ static void cfq_merged_request(struct request_queue *q, struct request *req,
 static void cfq_bio_merged(struct request_queue *q, struct request *req,
                                struct bio *bio)
 {
-       cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(req))->blkg,
-                                       bio_data_dir(bio), cfq_bio_sync(bio));
+       cfq_blkiocg_update_io_merged_stats(cfqg_to_blkg(RQ_CFQG(req)),
+                                          &blkio_policy_cfq, bio_data_dir(bio),
+                                          cfq_bio_sync(bio));
 }
 
 static void
@@ -1670,8 +1513,9 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
        if (cfqq->next_rq == next)
                cfqq->next_rq = rq;
        cfq_remove_request(next);
-       cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg,
-                                       rq_data_dir(next), rq_is_sync(next));
+       cfq_blkiocg_update_io_merged_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+                                          &blkio_policy_cfq, rq_data_dir(next),
+                                          rq_is_sync(next));
 
        cfqq = RQ_CFQQ(next);
        /*
@@ -1712,7 +1556,8 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
 static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
        del_timer(&cfqd->idle_slice_timer);
-       cfq_blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg);
+       cfq_blkiocg_update_idle_time_stats(cfqg_to_blkg(cfqq->cfqg),
+                                          &blkio_policy_cfq);
 }
 
 static void __cfq_set_active_queue(struct cfq_data *cfqd,
@@ -1721,7 +1566,8 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
        if (cfqq) {
                cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d",
                                cfqd->serving_prio, cfqd->serving_type);
-               cfq_blkiocg_update_avg_queue_size_stats(&cfqq->cfqg->blkg);
+               cfq_blkiocg_update_avg_queue_size_stats(cfqg_to_blkg(cfqq->cfqg),
+                                                       &blkio_policy_cfq);
                cfqq->slice_start = 0;
                cfqq->dispatch_start = jiffies;
                cfqq->allocated_slice = 0;
@@ -2042,7 +1888,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
         * task has exited, don't wait
         */
        cic = cfqd->active_cic;
-       if (!cic || !atomic_read(&cic->icq.ioc->nr_tasks))
+       if (!cic || !atomic_read(&cic->icq.ioc->active_ref))
                return;
 
        /*
@@ -2069,7 +1915,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
                sl = cfqd->cfq_slice_idle;
 
        mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
-       cfq_blkiocg_update_set_idle_time_stats(&cfqq->cfqg->blkg);
+       cfq_blkiocg_update_set_idle_time_stats(cfqg_to_blkg(cfqq->cfqg),
+                                              &blkio_policy_cfq);
        cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl,
                        group_idle ? 1 : 0);
 }
@@ -2092,8 +1939,9 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
 
        cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
        cfqq->nr_sectors += blk_rq_sectors(rq);
-       cfq_blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq),
-                                       rq_data_dir(rq), rq_is_sync(rq));
+       cfq_blkiocg_update_dispatch_stats(cfqg_to_blkg(cfqq->cfqg),
+                                         &blkio_policy_cfq, blk_rq_bytes(rq),
+                                         rq_data_dir(rq), rq_is_sync(rq));
 }
 
 /*
@@ -2675,7 +2523,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 
        BUG_ON(cfq_cfqq_on_rr(cfqq));
        kmem_cache_free(cfq_pool, cfqq);
-       cfq_put_cfqg(cfqg);
+       cfqg_put(cfqg);
 }
 
 static void cfq_put_cooperator(struct cfq_queue *cfqq)
@@ -2734,7 +2582,7 @@ static void cfq_exit_icq(struct io_cq *icq)
        }
 }
 
-static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
+static void cfq_init_prio_data(struct cfq_queue *cfqq, struct cfq_io_cq *cic)
 {
        struct task_struct *tsk = current;
        int ioprio_class;
@@ -2742,7 +2590,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
        if (!cfq_cfqq_prio_changed(cfqq))
                return;
 
-       ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio);
+       ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
        switch (ioprio_class) {
        default:
                printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
@@ -2754,11 +2602,11 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
                cfqq->ioprio_class = task_nice_ioclass(tsk);
                break;
        case IOPRIO_CLASS_RT:
-               cfqq->ioprio = task_ioprio(ioc);
+               cfqq->ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
                cfqq->ioprio_class = IOPRIO_CLASS_RT;
                break;
        case IOPRIO_CLASS_BE:
-               cfqq->ioprio = task_ioprio(ioc);
+               cfqq->ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
                cfqq->ioprio_class = IOPRIO_CLASS_BE;
                break;
        case IOPRIO_CLASS_IDLE:
@@ -2776,19 +2624,24 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
        cfq_clear_cfqq_prio_changed(cfqq);
 }
 
-static void changed_ioprio(struct cfq_io_cq *cic)
+static void check_ioprio_changed(struct cfq_io_cq *cic, struct bio *bio)
 {
+       int ioprio = cic->icq.ioc->ioprio;
        struct cfq_data *cfqd = cic_to_cfqd(cic);
        struct cfq_queue *cfqq;
 
-       if (unlikely(!cfqd))
+       /*
+        * Check whether ioprio has changed.  The condition may trigger
+        * spuriously on a newly created cic but there's no harm.
+        */
+       if (unlikely(!cfqd) || likely(cic->ioprio == ioprio))
                return;
 
        cfqq = cic->cfqq[BLK_RW_ASYNC];
        if (cfqq) {
                struct cfq_queue *new_cfqq;
-               new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->icq.ioc,
-                                               GFP_ATOMIC);
+               new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic, bio,
+                                        GFP_ATOMIC);
                if (new_cfqq) {
                        cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
                        cfq_put_queue(cfqq);
@@ -2798,6 +2651,8 @@ static void changed_ioprio(struct cfq_io_cq *cic)
        cfqq = cic->cfqq[BLK_RW_SYNC];
        if (cfqq)
                cfq_mark_cfqq_prio_changed(cfqq);
+
+       cic->ioprio = ioprio;
 }
 
 static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
@@ -2821,17 +2676,24 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 }
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-static void changed_cgroup(struct cfq_io_cq *cic)
+static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
 {
-       struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1);
        struct cfq_data *cfqd = cic_to_cfqd(cic);
-       struct request_queue *q;
+       struct cfq_queue *sync_cfqq;
+       uint64_t id;
 
-       if (unlikely(!cfqd))
-               return;
+       rcu_read_lock();
+       id = bio_blkio_cgroup(bio)->id;
+       rcu_read_unlock();
 
-       q = cfqd->queue;
+       /*
+        * Check whether blkcg has changed.  The condition may trigger
+        * spuriously on a newly created cic but there's no harm.
+        */
+       if (unlikely(!cfqd) || likely(cic->blkcg_id == id))
+               return;
 
+       sync_cfqq = cic_to_cfqq(cic, 1);
        if (sync_cfqq) {
                /*
                 * Drop reference to sync queue. A new sync queue will be
@@ -2841,21 +2703,26 @@ static void changed_cgroup(struct cfq_io_cq *cic)
                cic_set_cfqq(cic, NULL, 1);
                cfq_put_queue(sync_cfqq);
        }
+
+       cic->blkcg_id = id;
 }
+#else
+static inline void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) { }
 #endif  /* CONFIG_CFQ_GROUP_IOSCHED */
 
 static struct cfq_queue *
-cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
-                    struct io_context *ioc, gfp_t gfp_mask)
+cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
+                    struct bio *bio, gfp_t gfp_mask)
 {
+       struct blkio_cgroup *blkcg;
        struct cfq_queue *cfqq, *new_cfqq = NULL;
-       struct cfq_io_cq *cic;
        struct cfq_group *cfqg;
 
 retry:
-       cfqg = cfq_get_cfqg(cfqd);
-       cic = cfq_cic_lookup(cfqd, ioc);
-       /* cic always exists here */
+       rcu_read_lock();
+
+       blkcg = bio_blkio_cgroup(bio);
+       cfqg = cfq_lookup_create_cfqg(cfqd, blkcg);
        cfqq = cic_to_cfqq(cic, is_sync);
 
        /*
@@ -2868,6 +2735,7 @@ retry:
                        cfqq = new_cfqq;
                        new_cfqq = NULL;
                } else if (gfp_mask & __GFP_WAIT) {
+                       rcu_read_unlock();
                        spin_unlock_irq(cfqd->queue->queue_lock);
                        new_cfqq = kmem_cache_alloc_node(cfq_pool,
                                        gfp_mask | __GFP_ZERO,
@@ -2883,7 +2751,7 @@ retry:
 
                if (cfqq) {
                        cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
-                       cfq_init_prio_data(cfqq, ioc);
+                       cfq_init_prio_data(cfqq, cic);
                        cfq_link_cfqq_cfqg(cfqq, cfqg);
                        cfq_log_cfqq(cfqd, cfqq, "alloced");
                } else
@@ -2893,6 +2761,7 @@ retry:
        if (new_cfqq)
                kmem_cache_free(cfq_pool, new_cfqq);
 
+       rcu_read_unlock();
        return cfqq;
 }
 
@@ -2902,6 +2771,9 @@ cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
        switch (ioprio_class) {
        case IOPRIO_CLASS_RT:
                return &cfqd->async_cfqq[0][ioprio];
+       case IOPRIO_CLASS_NONE:
+               ioprio = IOPRIO_NORM;
+               /* fall through */
        case IOPRIO_CLASS_BE:
                return &cfqd->async_cfqq[1][ioprio];
        case IOPRIO_CLASS_IDLE:
@@ -2912,11 +2784,11 @@ cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
 }
 
 static struct cfq_queue *
-cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
-             gfp_t gfp_mask)
+cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
+             struct bio *bio, gfp_t gfp_mask)
 {
-       const int ioprio = task_ioprio(ioc);
-       const int ioprio_class = task_ioprio_class(ioc);
+       const int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
+       const int ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
        struct cfq_queue **async_cfqq = NULL;
        struct cfq_queue *cfqq = NULL;
 
@@ -2926,7 +2798,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
        }
 
        if (!cfqq)
-               cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
+               cfqq = cfq_find_alloc_queue(cfqd, is_sync, cic, bio, gfp_mask);
 
        /*
         * pin the queue now that it's allocated, scheduler exit will prune it
@@ -3008,7 +2880,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 
        if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE))
                enable_idle = 0;
-       else if (!atomic_read(&cic->icq.ioc->nr_tasks) ||
+       else if (!atomic_read(&cic->icq.ioc->active_ref) ||
                 !cfqd->cfq_slice_idle ||
                 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
                enable_idle = 0;
@@ -3173,7 +3045,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                                __blk_run_queue(cfqd->queue);
                        } else {
                                cfq_blkiocg_update_idle_time_stats(
-                                               &cfqq->cfqg->blkg);
+                                               cfqg_to_blkg(cfqq->cfqg),
+                                               &blkio_policy_cfq);
                                cfq_mark_cfqq_must_dispatch(cfqq);
                        }
                }
@@ -3195,14 +3068,15 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
        struct cfq_queue *cfqq = RQ_CFQQ(rq);
 
        cfq_log_cfqq(cfqd, cfqq, "insert_request");
-       cfq_init_prio_data(cfqq, RQ_CIC(rq)->icq.ioc);
+       cfq_init_prio_data(cfqq, RQ_CIC(rq));
 
        rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
        list_add_tail(&rq->queuelist, &cfqq->fifo);
        cfq_add_rq_rb(rq);
-       cfq_blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
-                       &cfqd->serving_group->blkg, rq_data_dir(rq),
-                       rq_is_sync(rq));
+       cfq_blkiocg_update_io_add_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+                                       &blkio_policy_cfq,
+                                       cfqg_to_blkg(cfqd->serving_group),
+                                       rq_data_dir(rq), rq_is_sync(rq));
        cfq_rq_enqueued(cfqd, cfqq, rq);
 }
 
@@ -3298,9 +3172,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
        cfqd->rq_in_driver--;
        cfqq->dispatched--;
        (RQ_CFQG(rq))->dispatched--;
-       cfq_blkiocg_update_completion_stats(&cfqq->cfqg->blkg,
-                       rq_start_time_ns(rq), rq_io_start_time_ns(rq),
-                       rq_data_dir(rq), rq_is_sync(rq));
+       cfq_blkiocg_update_completion_stats(cfqg_to_blkg(cfqq->cfqg),
+                       &blkio_policy_cfq, rq_start_time_ns(rq),
+                       rq_io_start_time_ns(rq), rq_data_dir(rq),
+                       rq_is_sync(rq));
 
        cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
 
@@ -3397,7 +3272,7 @@ static int cfq_may_queue(struct request_queue *q, int rw)
 
        cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
        if (cfqq) {
-               cfq_init_prio_data(cfqq, cic->icq.ioc);
+               cfq_init_prio_data(cfqq, cic);
 
                return __cfq_may_queue(cfqq);
        }
@@ -3419,7 +3294,7 @@ static void cfq_put_request(struct request *rq)
                cfqq->allocated[rw]--;
 
                /* Put down rq reference on cfqg */
-               cfq_put_cfqg(RQ_CFQG(rq));
+               cfqg_put(RQ_CFQG(rq));
                rq->elv.priv[0] = NULL;
                rq->elv.priv[1] = NULL;
 
@@ -3463,32 +3338,25 @@ split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq)
  * Allocate cfq data structures associated with this request.
  */
 static int
-cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
+cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
+               gfp_t gfp_mask)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq);
        const int rw = rq_data_dir(rq);
        const bool is_sync = rq_is_sync(rq);
        struct cfq_queue *cfqq;
-       unsigned int changed;
 
        might_sleep_if(gfp_mask & __GFP_WAIT);
 
        spin_lock_irq(q->queue_lock);
 
-       /* handle changed notifications */
-       changed = icq_get_changed(&cic->icq);
-       if (unlikely(changed & ICQ_IOPRIO_CHANGED))
-               changed_ioprio(cic);
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-       if (unlikely(changed & ICQ_CGROUP_CHANGED))
-               changed_cgroup(cic);
-#endif
-
+       check_ioprio_changed(cic, bio);
+       check_blkcg_changed(cic, bio);
 new_queue:
        cfqq = cic_to_cfqq(cic, is_sync);
        if (!cfqq || cfqq == &cfqd->oom_cfqq) {
-               cfqq = cfq_get_queue(cfqd, is_sync, cic->icq.ioc, gfp_mask);
+               cfqq = cfq_get_queue(cfqd, is_sync, cic, bio, gfp_mask);
                cic_set_cfqq(cic, cfqq, is_sync);
        } else {
                /*
@@ -3514,8 +3382,9 @@ new_queue:
        cfqq->allocated[rw]++;
 
        cfqq->ref++;
+       cfqg_get(cfqq->cfqg);
        rq->elv.priv[0] = cfqq;
-       rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg);
+       rq->elv.priv[1] = cfqq->cfqg;
        spin_unlock_irq(q->queue_lock);
        return 0;
 }
@@ -3612,7 +3481,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
 {
        struct cfq_data *cfqd = e->elevator_data;
        struct request_queue *q = cfqd->queue;
-       bool wait = false;
 
        cfq_shutdown_timer_wq(cfqd);
 
@@ -3622,89 +3490,58 @@ static void cfq_exit_queue(struct elevator_queue *e)
                __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
 
        cfq_put_async_queues(cfqd);
-       cfq_release_cfq_groups(cfqd);
-
-       /*
-        * If there are groups which we could not unlink from blkcg list,
-        * wait for a rcu period for them to be freed.
-        */
-       if (cfqd->nr_blkcg_linked_grps)
-               wait = true;
 
        spin_unlock_irq(q->queue_lock);
 
        cfq_shutdown_timer_wq(cfqd);
 
-       /*
-        * Wait for cfqg->blkg->key accessors to exit their grace periods.
-        * Do this wait only if there are other unlinked groups out
-        * there. This can happen if cgroup deletion path claimed the
-        * responsibility of cleaning up a group before queue cleanup code
-        * get to the group.
-        *
-        * Do not call synchronize_rcu() unconditionally as there are drivers
-        * which create/delete request queue hundreds of times during scan/boot
-        * and synchronize_rcu() can take significant time and slow down boot.
-        */
-       if (wait)
-               synchronize_rcu();
-
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-       /* Free up per cpu stats for root group */
-       free_percpu(cfqd->root_group.blkg.stats_cpu);
+#ifndef CONFIG_CFQ_GROUP_IOSCHED
+       kfree(cfqd->root_group);
 #endif
+       update_root_blkg_pd(q, BLKIO_POLICY_PROP);
        kfree(cfqd);
 }
 
-static void *cfq_init_queue(struct request_queue *q)
+static int cfq_init_queue(struct request_queue *q)
 {
        struct cfq_data *cfqd;
-       int i, j;
-       struct cfq_group *cfqg;
-       struct cfq_rb_root *st;
+       struct blkio_group *blkg __maybe_unused;
+       int i;
 
        cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
        if (!cfqd)
-               return NULL;
+               return -ENOMEM;
+
+       cfqd->queue = q;
+       q->elevator->elevator_data = cfqd;
 
        /* Init root service tree */
        cfqd->grp_service_tree = CFQ_RB_ROOT;
 
-       /* Init root group */
-       cfqg = &cfqd->root_group;
-       for_each_cfqg_st(cfqg, i, j, st)
-               *st = CFQ_RB_ROOT;
-       RB_CLEAR_NODE(&cfqg->rb_node);
-
-       /* Give preference to root group over other groups */
-       cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
-
+       /* Init root group and prefer root group over other groups by default */
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-       /*
-        * Set root group reference to 2. One reference will be dropped when
-        * all groups on cfqd->cfqg_list are being deleted during queue exit.
-        * Other reference will remain there as we don't want to delete this
-        * group as it is statically allocated and gets destroyed when
-        * throtl_data goes away.
-        */
-       cfqg->ref = 2;
+       rcu_read_lock();
+       spin_lock_irq(q->queue_lock);
 
-       if (blkio_alloc_blkg_stats(&cfqg->blkg)) {
-               kfree(cfqg);
+       blkg = blkg_lookup_create(&blkio_root_cgroup, q, true);
+       if (!IS_ERR(blkg))
+               cfqd->root_group = blkg_to_cfqg(blkg);
+
+       spin_unlock_irq(q->queue_lock);
+       rcu_read_unlock();
+#else
+       cfqd->root_group = kzalloc_node(sizeof(*cfqd->root_group),
+                                       GFP_KERNEL, cfqd->queue->node);
+       if (cfqd->root_group)
+               cfq_init_cfqg_base(cfqd->root_group);
+#endif
+       if (!cfqd->root_group) {
                kfree(cfqd);
-               return NULL;
+               return -ENOMEM;
        }
 
-       rcu_read_lock();
-
-       cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
-                                       (void *)cfqd, 0);
-       rcu_read_unlock();
-       cfqd->nr_blkcg_linked_grps++;
+       cfqd->root_group->weight = 2*BLKIO_WEIGHT_DEFAULT;
 
-       /* Add group on cfqd->cfqg_list */
-       hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
-#endif
        /*
         * Not strictly needed (since RB_ROOT just clears the node and we
         * zeroed cfqd on alloc), but better be safe in case someone decides
@@ -3716,13 +3553,17 @@ static void *cfq_init_queue(struct request_queue *q)
        /*
         * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
         * Grab a permanent reference to it, so that the normal code flow
-        * will not attempt to free it.
+        * will not attempt to free it.  oom_cfqq is linked to root_group
+        * but shouldn't hold a reference as it'll never be unlinked.  Lose
+        * the reference from linking right away.
         */
        cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
        cfqd->oom_cfqq.ref++;
-       cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
 
-       cfqd->queue = q;
+       spin_lock_irq(q->queue_lock);
+       cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, cfqd->root_group);
+       cfqg_put(cfqd->root_group);
+       spin_unlock_irq(q->queue_lock);
 
        init_timer(&cfqd->idle_slice_timer);
        cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
@@ -3747,7 +3588,7 @@ static void *cfq_init_queue(struct request_queue *q)
         * second, in order to have larger depth for async operations.
         */
        cfqd->last_delayed_sync = jiffies - HZ;
-       return cfqd;
+       return 0;
 }
 
 /*
@@ -3873,13 +3714,12 @@ static struct elevator_type iosched_cfq = {
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
 static struct blkio_policy_type blkio_policy_cfq = {
        .ops = {
-               .blkio_unlink_group_fn =        cfq_unlink_blkio_group,
+               .blkio_init_group_fn =          cfq_init_blkio_group,
                .blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
        },
        .plid = BLKIO_POLICY_PROP,
+       .pdata_size = sizeof(struct cfq_group),
 };
-#else
-static struct blkio_policy_type blkio_policy_cfq;
 #endif
 
 static int __init cfq_init(void)
@@ -3910,14 +3750,17 @@ static int __init cfq_init(void)
                return ret;
        }
 
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
        blkio_policy_register(&blkio_policy_cfq);
-
+#endif
        return 0;
 }
 
 static void __exit cfq_exit(void)
 {
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
        blkio_policy_unregister(&blkio_policy_cfq);
+#endif
        elv_unregister(&iosched_cfq);
        kmem_cache_destroy(cfq_pool);
 }