4 * Common Block IO controller cgroup interface
6 * Based on ideas and code from CFQ, CFS and BFQ:
7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
9 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
10 * Paolo Valente <paolo.valente@unimore.it>
12 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
13 * Nauman Rafique <nauman@google.com>
16 #include <linux/cgroup.h>
17 #include <linux/u64_stats_sync.h>
19 enum blkio_policy_id {
20 BLKIO_POLICY_PROP = 0, /* Proportional Bandwidth division */
21 BLKIO_POLICY_THROTL, /* Throttling */
26 /* Max limits for throttle policy */
27 #define THROTL_IOPS_MAX UINT_MAX
29 #ifdef CONFIG_BLK_CGROUP
31 /* cft->private [un]packing for stat printing */
32 #define BLKCG_STAT_PRIV(pol, off) (((unsigned)(pol) << 16) | (off))
33 #define BLKCG_STAT_POL(prv) ((unsigned)(prv) >> 16)
34 #define BLKCG_STAT_OFF(prv) ((unsigned)(prv) & 0xffff)
36 enum blkg_rwstat_type {
43 BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
46 /* blkg state flags */
47 enum blkg_state_flags {
53 /* cgroup files owned by proportional weight policy */
54 enum blkcg_file_name_prop {
55 BLKIO_PROP_weight = 1,
56 BLKIO_PROP_weight_device,
59 /* cgroup files owned by throttle policy */
60 enum blkcg_file_name_throtl {
61 BLKIO_THROTL_read_bps_device,
62 BLKIO_THROTL_write_bps_device,
63 BLKIO_THROTL_read_iops_device,
64 BLKIO_THROTL_write_iops_device,
68 struct cgroup_subsys_state css;
71 struct hlist_head blkg_list;
73 /* for policies to test whether associated blkcg has changed */
78 struct u64_stats_sync syncp;
83 struct u64_stats_sync syncp;
84 uint64_t cnt[BLKG_RWSTAT_NR];
87 struct blkio_group_stats {
88 /* number of ios merged */
89 struct blkg_rwstat merged;
90 /* total time spent on device in ns, may not be accurate w/ queueing */
91 struct blkg_rwstat service_time;
92 /* total time spent waiting in scheduler queue in ns */
93 struct blkg_rwstat wait_time;
94 /* number of IOs queued up */
95 struct blkg_rwstat queued;
96 /* total disk time and nr sectors dispatched by this group */
97 struct blkg_stat time;
98 #ifdef CONFIG_DEBUG_BLK_CGROUP
99 /* time not charged to this cgroup */
100 struct blkg_stat unaccounted_time;
101 /* sum of number of ios queued across all samples */
102 struct blkg_stat avg_queue_size_sum;
103 /* count of samples taken for average */
104 struct blkg_stat avg_queue_size_samples;
105 /* how many times this group has been removed from service tree */
106 struct blkg_stat dequeue;
107 /* total time spent waiting for it to be assigned a timeslice. */
108 struct blkg_stat group_wait_time;
109 /* time spent idling for this blkio_group */
110 struct blkg_stat idle_time;
111 /* total time with empty current active q with other requests queued */
112 struct blkg_stat empty_time;
113 /* fields after this shouldn't be cleared on stat reset */
114 uint64_t start_group_wait_time;
115 uint64_t start_idle_time;
116 uint64_t start_empty_time;
121 /* Per cpu blkio group stats */
122 struct blkio_group_stats_cpu {
123 /* total bytes transferred */
124 struct blkg_rwstat service_bytes;
125 /* total IOs serviced, post merge */
126 struct blkg_rwstat serviced;
127 /* total sectors transferred */
128 struct blkg_stat sectors;
131 struct blkio_group_conf {
133 unsigned int iops[2];
137 /* per-blkg per-policy data */
138 struct blkg_policy_data {
139 /* the blkg this per-policy data belongs to */
140 struct blkio_group *blkg;
143 struct blkio_group_conf conf;
145 struct blkio_group_stats stats;
146 /* Per cpu stats pointer */
147 struct blkio_group_stats_cpu __percpu *stats_cpu;
149 /* pol->pdata_size bytes of private data used by policy impl */
150 char pdata[] __aligned(__alignof__(unsigned long long));
154 /* Pointer to the associated request_queue */
155 struct request_queue *q;
156 struct list_head q_node;
157 struct hlist_node blkcg_node;
158 struct blkio_cgroup *blkcg;
159 /* Store cgroup path */
161 /* reference count */
164 struct blkg_policy_data *pd[BLKIO_NR_POLICIES];
166 /* List of blkg waiting for per cpu stats memory to be allocated */
167 struct list_head alloc_node;
168 struct rcu_head rcu_head;
171 typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
172 typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
173 struct blkio_group *blkg, unsigned int weight);
174 typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
175 struct blkio_group *blkg, u64 read_bps);
176 typedef void (blkio_update_group_write_bps_fn)(struct request_queue *q,
177 struct blkio_group *blkg, u64 write_bps);
178 typedef void (blkio_update_group_read_iops_fn)(struct request_queue *q,
179 struct blkio_group *blkg, unsigned int read_iops);
180 typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
181 struct blkio_group *blkg, unsigned int write_iops);
183 struct blkio_policy_ops {
184 blkio_init_group_fn *blkio_init_group_fn;
185 blkio_update_group_weight_fn *blkio_update_group_weight_fn;
186 blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
187 blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
188 blkio_update_group_read_iops_fn *blkio_update_group_read_iops_fn;
189 blkio_update_group_write_iops_fn *blkio_update_group_write_iops_fn;
192 struct blkio_policy_type {
193 struct list_head list;
194 struct blkio_policy_ops ops;
195 enum blkio_policy_id plid;
196 size_t pdata_size; /* policy specific private data size */
199 extern int blkcg_init_queue(struct request_queue *q);
200 extern void blkcg_drain_queue(struct request_queue *q);
201 extern void blkcg_exit_queue(struct request_queue *q);
203 /* Blkio controller policy registration */
204 extern void blkio_policy_register(struct blkio_policy_type *);
205 extern void blkio_policy_unregister(struct blkio_policy_type *);
206 extern void blkg_destroy_all(struct request_queue *q, bool destroy_root);
207 extern void update_root_blkg_pd(struct request_queue *q,
208 enum blkio_policy_id plid);
211 * blkg_to_pdata - get policy private data
212 * @blkg: blkg of interest
213 * @pol: policy of interest
215 * Return pointer to private data associated with the @blkg-@pol pair.
217 static inline void *blkg_to_pdata(struct blkio_group *blkg,
218 struct blkio_policy_type *pol)
220 return blkg ? blkg->pd[pol->plid]->pdata : NULL;
224 * pdata_to_blkg - get blkg associated with policy private data
225 * @pdata: policy private data of interest
227 * @pdata is policy private data. Determine the blkg it's associated with.
229 static inline struct blkio_group *pdata_to_blkg(void *pdata)
232 struct blkg_policy_data *pd =
233 container_of(pdata, struct blkg_policy_data, pdata);
239 static inline char *blkg_path(struct blkio_group *blkg)
245 * blkg_get - get a blkg reference
248 * The caller should be holding queue_lock and an existing reference.
250 static inline void blkg_get(struct blkio_group *blkg)
252 lockdep_assert_held(blkg->q->queue_lock);
253 WARN_ON_ONCE(!blkg->refcnt);
257 void __blkg_release(struct blkio_group *blkg);
260 * blkg_put - put a blkg reference
263 * The caller should be holding queue_lock.
265 static inline void blkg_put(struct blkio_group *blkg)
267 lockdep_assert_held(blkg->q->queue_lock);
268 WARN_ON_ONCE(blkg->refcnt <= 0);
270 __blkg_release(blkg);
274 * blkg_stat_add - add a value to a blkg_stat
275 * @stat: target blkg_stat
278 * Add @val to @stat. The caller is responsible for synchronizing calls to
281 static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
283 u64_stats_update_begin(&stat->syncp);
285 u64_stats_update_end(&stat->syncp);
289 * blkg_stat_read - read the current value of a blkg_stat
290 * @stat: blkg_stat to read
292 * Read the current value of @stat. This function can be called without
293 * synchroniztion and takes care of u64 atomicity.
295 static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
301 start = u64_stats_fetch_begin(&stat->syncp);
303 } while (u64_stats_fetch_retry(&stat->syncp, start));
309 * blkg_stat_reset - reset a blkg_stat
310 * @stat: blkg_stat to reset
312 static inline void blkg_stat_reset(struct blkg_stat *stat)
318 * blkg_rwstat_add - add a value to a blkg_rwstat
319 * @rwstat: target blkg_rwstat
320 * @rw: mask of REQ_{WRITE|SYNC}
323 * Add @val to @rwstat. The counters are chosen according to @rw. The
324 * caller is responsible for synchronizing calls to this function.
326 static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
327 int rw, uint64_t val)
329 u64_stats_update_begin(&rwstat->syncp);
332 rwstat->cnt[BLKG_RWSTAT_WRITE] += val;
334 rwstat->cnt[BLKG_RWSTAT_READ] += val;
336 rwstat->cnt[BLKG_RWSTAT_SYNC] += val;
338 rwstat->cnt[BLKG_RWSTAT_ASYNC] += val;
340 u64_stats_update_end(&rwstat->syncp);
344 * blkg_rwstat_read - read the current values of a blkg_rwstat
345 * @rwstat: blkg_rwstat to read
347 * Read the current snapshot of @rwstat and return it as the return value.
348 * This function can be called without synchronization and takes care of
351 static struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
354 struct blkg_rwstat tmp;
357 start = u64_stats_fetch_begin(&rwstat->syncp);
359 } while (u64_stats_fetch_retry(&rwstat->syncp, start));
365 * blkg_rwstat_sum - read the total count of a blkg_rwstat
366 * @rwstat: blkg_rwstat to read
368 * Return the total count of @rwstat regardless of the IO direction. This
369 * function can be called without synchronization and takes care of u64
372 static inline uint64_t blkg_rwstat_sum(struct blkg_rwstat *rwstat)
374 struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
376 return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
380 * blkg_rwstat_reset - reset a blkg_rwstat
381 * @rwstat: blkg_rwstat to reset
383 static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
385 memset(rwstat->cnt, 0, sizeof(rwstat->cnt));
393 struct blkio_policy_type {
396 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
397 static inline void blkcg_drain_queue(struct request_queue *q) { }
398 static inline void blkcg_exit_queue(struct request_queue *q) { }
399 static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
400 static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
401 static inline void blkg_destroy_all(struct request_queue *q,
402 bool destory_root) { }
403 static inline void update_root_blkg_pd(struct request_queue *q,
404 enum blkio_policy_id plid) { }
406 static inline void *blkg_to_pdata(struct blkio_group *blkg,
407 struct blkio_policy_type *pol) { return NULL; }
408 static inline struct blkio_group *pdata_to_blkg(void *pdata,
409 struct blkio_policy_type *pol) { return NULL; }
410 static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
411 static inline void blkg_get(struct blkio_group *blkg) { }
412 static inline void blkg_put(struct blkio_group *blkg) { }
416 #define BLKIO_WEIGHT_MIN 10
417 #define BLKIO_WEIGHT_MAX 1000
418 #define BLKIO_WEIGHT_DEFAULT 500
420 #ifdef CONFIG_DEBUG_BLK_CGROUP
421 void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
422 struct blkio_policy_type *pol);
423 void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
424 struct blkio_policy_type *pol,
425 unsigned long dequeue);
426 void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
427 struct blkio_policy_type *pol);
428 void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
429 struct blkio_policy_type *pol);
430 void blkiocg_set_start_empty_time(struct blkio_group *blkg,
431 struct blkio_policy_type *pol);
433 #define BLKG_FLAG_FNS(name) \
434 static inline void blkio_mark_blkg_##name( \
435 struct blkio_group_stats *stats) \
437 stats->flags |= (1 << BLKG_##name); \
439 static inline void blkio_clear_blkg_##name( \
440 struct blkio_group_stats *stats) \
442 stats->flags &= ~(1 << BLKG_##name); \
444 static inline int blkio_blkg_##name(struct blkio_group_stats *stats) \
446 return (stats->flags & (1 << BLKG_##name)) != 0; \
449 BLKG_FLAG_FNS(waiting)
450 BLKG_FLAG_FNS(idling)
454 static inline void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
455 struct blkio_policy_type *pol) { }
456 static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
457 struct blkio_policy_type *pol, unsigned long dequeue) { }
458 static inline void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
459 struct blkio_policy_type *pol) { }
460 static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
461 struct blkio_policy_type *pol) { }
462 static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg,
463 struct blkio_policy_type *pol) { }
466 #ifdef CONFIG_BLK_CGROUP
467 extern struct blkio_cgroup blkio_root_cgroup;
468 extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
469 extern struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio);
470 extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
471 struct request_queue *q);
472 struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
473 struct request_queue *q,
475 void blkiocg_update_timeslice_used(struct blkio_group *blkg,
476 struct blkio_policy_type *pol,
478 unsigned long unaccounted_time);
479 void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
480 struct blkio_policy_type *pol,
481 uint64_t bytes, bool direction, bool sync);
482 void blkiocg_update_completion_stats(struct blkio_group *blkg,
483 struct blkio_policy_type *pol,
485 uint64_t io_start_time, bool direction,
487 void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
488 struct blkio_policy_type *pol,
489 bool direction, bool sync);
490 void blkiocg_update_io_add_stats(struct blkio_group *blkg,
491 struct blkio_policy_type *pol,
492 struct blkio_group *curr_blkg, bool direction,
494 void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
495 struct blkio_policy_type *pol,
496 bool direction, bool sync);
499 static inline struct blkio_cgroup *
500 cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
501 static inline struct blkio_cgroup *
502 bio_blkio_cgroup(struct bio *bio) { return NULL; }
504 static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
505 void *key) { return NULL; }
506 static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
507 struct blkio_policy_type *pol, unsigned long time,
508 unsigned long unaccounted_time) { }
509 static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
510 struct blkio_policy_type *pol, uint64_t bytes,
511 bool direction, bool sync) { }
512 static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
513 struct blkio_policy_type *pol, uint64_t start_time,
514 uint64_t io_start_time, bool direction, bool sync) { }
515 static inline void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
516 struct blkio_policy_type *pol, bool direction,
518 static inline void blkiocg_update_io_add_stats(struct blkio_group *blkg,
519 struct blkio_policy_type *pol,
520 struct blkio_group *curr_blkg, bool direction,
522 static inline void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
523 struct blkio_policy_type *pol, bool direction,
526 #endif /* _BLK_CGROUP_H */