Merge branch 'for-3.13-fixes' into for-3.14

author Tejun Heo <tj@kernel.org>

Fri, 6 Dec 2013 20:09:27 +0000 (15:09 -0500)

committer Tejun Heo <tj@kernel.org>

Fri, 6 Dec 2013 20:09:27 +0000 (15:09 -0500)
author Tejun Heo <tj@kernel.org>
Fri, 6 Dec 2013 20:09:27 +0000 (15:09 -0500)
committer Tejun Heo <tj@kernel.org>
Fri, 6 Dec 2013 20:09:27 +0000 (15:09 -0500)
diff --combined kernel/cgroup.c

index f9ae38a,bcb1755..2e5fbf9
--- 1/kernel/cgroup.c
--- 2/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@@ -41,6 -41,7 +41,6 @@@
   #include <linux/rcupdate.h>
   #include <linux/sched.h>
   #include <linux/backing-dev.h>
- -#include <linux/seq_file.h>
   #include <linux/slab.h>
   #include <linux/magic.h>
   #include <linux/spinlock.h>
@@@ -55,20 -56,15 +55,20 @@@
   #include <linux/pid_namespace.h>
   #include <linux/idr.h>
   #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
- -#include <linux/eventfd.h>
- -#include <linux/poll.h>
   #include <linux/flex_array.h> /* used in cgroup_attach_task */
   #include <linux/kthread.h>
- -#include <linux/file.h>
   
   #include <linux/atomic.h>
   
   /*
+ + * pidlists linger the following amount before being destroyed.  The goal
+ + * is avoiding frequent destruction in the middle of consecutive read calls
+ + * Expiring in the middle is a performance problem not a correctness one.
+ + * 1 sec should be enough.
+ + */
+ +#define CGROUP_PIDLIST_DESTROY_DELAY  HZ
+ +
+ +/*
    * cgroup_mutex is the master lock.  Any modification to cgroup or its
    * hierarchy must be performed while holding it.
    *
@@@ -102,12 -98,6 +102,12 @@@ static DEFINE_MUTEX(cgroup_root_mutex)
   static struct workqueue_struct *cgroup_destroy_wq;
   
   /*
+ + * pidlist destructions need to be flushed on cgroup destruction.  Use a
+ + * separate workqueue as flush domain.
+ + */
+ +static struct workqueue_struct *cgroup_pidlist_destroy_wq;
+ +
+ +/*
    * Generate an array of cgroup subsystem pointers. At boot time, this is
    * populated with the built in subsystems, and modular subsystems are
    * registered after that. The mutable section of this array is protected by
@@@ -129,6 -119,49 +129,6 @@@ static struct cgroupfs_root cgroup_dumm
   /* dummy_top is a shorthand for the dummy hierarchy's top cgroup */
   static struct cgroup * const cgroup_dummy_top = &cgroup_dummy_root.top_cgroup;
   
- -/*
- - * cgroupfs file entry, pointed to from leaf dentry->d_fsdata.
- - */
- -struct cfent {
- -      struct list_head                node;
- -      struct dentry                   *dentry;
- -      struct cftype                   *type;
- -      struct cgroup_subsys_state      *css;
- -
- -      /* file xattrs */
- -      struct simple_xattrs            xattrs;
- -};
- -
- -/*
- - * cgroup_event represents events which userspace want to receive.
- - */
- -struct cgroup_event {
- -      /*
- -       * css which the event belongs to.
- -       */
- -      struct cgroup_subsys_state *css;
- -      /*
- -       * Control file which the event associated.
- -       */
- -      struct cftype *cft;
- -      /*
- -       * eventfd to signal userspace about the event.
- -       */
- -      struct eventfd_ctx *eventfd;
- -      /*
- -       * Each of these stored in a list by the cgroup.
- -       */
- -      struct list_head list;
- -      /*
- -       * All fields below needed to unregister event when
- -       * userspace closes eventfd.
- -       */
- -      poll_table pt;
- -      wait_queue_head_t *wqh;
- -      wait_queue_t wait;
- -      struct work_struct remove;
- -};
- -
   /* The list of hierarchy roots */
   
   static LIST_HEAD(cgroup_roots);
@@@ -167,7 -200,6 +167,7 @@@ static int cgroup_destroy_locked(struc
   static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
                               bool is_add);
   static int cgroup_file_release(struct inode *inode, struct file *file);
+ +static void cgroup_pidlist_destroy_all(struct cgroup *cgrp);
   
   /**
    * cgroup_css - obtain a cgroup's css for the specified subsystem
@@@ -831,7 -863,11 +831,7 @@@ static void cgroup_free_fn(struct work_
          */
         deactivate_super(cgrp->root->sb);
   
- -      /*
- -       * if we're getting rid of the cgroup, refcount should ensure
- -       * that there are no pidlists left.
- -       */
- -      BUG_ON(!list_empty(&cgrp->pidlists));
+ +      cgroup_pidlist_destroy_all(cgrp);
   
         simple_xattrs_free(&cgrp->xattrs);
   
@@@ -1316,6 -1352,8 +1316,6 @@@ static void init_cgroup_housekeeping(st
         INIT_LIST_HEAD(&cgrp->pidlists);
         mutex_init(&cgrp->pidlist_mutex);
         cgrp->dummy_css.cgroup = cgrp;
- -      INIT_LIST_HEAD(&cgrp->event_list);
- -      spin_lock_init(&cgrp->event_list_lock);
         simple_xattrs_init(&cgrp->xattrs);
   }
   
@@@ -2212,9 -2250,10 +2212,9 @@@ static int cgroup_release_agent_write(s
         return 0;
   }
   
- -static int cgroup_release_agent_show(struct cgroup_subsys_state *css,
- -                                   struct cftype *cft, struct seq_file *seq)
+ +static int cgroup_release_agent_show(struct seq_file *seq, void *v)
   {
- -      struct cgroup *cgrp = css->cgroup;
+ +      struct cgroup *cgrp = seq_css(seq)->cgroup;
   
         if (!cgroup_lock_live_group(cgrp))
                 return -ENODEV;
@@@ -2224,129 -2263,174 +2224,129 @@@
         return 0;
   }
   
- -static int cgroup_sane_behavior_show(struct cgroup_subsys_state *css,
- -                                   struct cftype *cft, struct seq_file *seq)
+ +static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
   {
- -      seq_printf(seq, "%d\n", cgroup_sane_behavior(css->cgroup));
+ +      struct cgroup *cgrp = seq_css(seq)->cgroup;
+ +
+ +      seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp));
         return 0;
   }
   
   /* A buffer size big enough for numbers or short strings */
   #define CGROUP_LOCAL_BUFFER_SIZE 64
   
- -static ssize_t cgroup_write_X64(struct cgroup_subsys_state *css,
- -                              struct cftype *cft, struct file *file,
- -                              const char __user *userbuf, size_t nbytes,
- -                              loff_t *unused_ppos)
+ +static ssize_t cgroup_file_write(struct file *file, const char __user *userbuf,
+ +                               size_t nbytes, loff_t *ppos)
   {
- -      char buffer[CGROUP_LOCAL_BUFFER_SIZE];
- -      int retval = 0;
- -      char *end;
+ +      struct cfent *cfe = __d_cfe(file->f_dentry);
+ +      struct cftype *cft = __d_cft(file->f_dentry);
+ +      struct cgroup_subsys_state *css = cfe->css;
+ +      size_t max_bytes = cft->max_write_len ?: CGROUP_LOCAL_BUFFER_SIZE - 1;
+ +      char *buf;
+ +      int ret;
   
- -      if (!nbytes)
- -              return -EINVAL;
- -      if (nbytes >= sizeof(buffer))
+ +      if (nbytes >= max_bytes)
                 return -E2BIG;
- -      if (copy_from_user(buffer, userbuf, nbytes))
- -              return -EFAULT;
   
- -      buffer[nbytes] = 0;     /* nul-terminate */
- -      if (cft->write_u64) {
- -              u64 val = simple_strtoull(strstrip(buffer), &end, 0);
- -              if (*end)
- -                      return -EINVAL;
- -              retval = cft->write_u64(css, cft, val);
+ +      buf = kmalloc(nbytes + 1, GFP_KERNEL);
+ +      if (!buf)
+ +              return -ENOMEM;
+ +
+ +      if (copy_from_user(buf, userbuf, nbytes)) {
+ +              ret = -EFAULT;
+ +              goto out_free;
+ +      }
+ +
+ +      buf[nbytes] = '\0';
+ +
+ +      if (cft->write_string) {
+ +              ret = cft->write_string(css, cft, strstrip(buf));
+ +      } else if (cft->write_u64) {
+ +              unsigned long long v;
+ +              ret = kstrtoull(buf, 0, &v);
+ +              if (!ret)
+ +                      ret = cft->write_u64(css, cft, v);
+ +      } else if (cft->write_s64) {
+ +              long long v;
+ +              ret = kstrtoll(buf, 0, &v);
+ +              if (!ret)
+ +                      ret = cft->write_s64(css, cft, v);
+ +      } else if (cft->trigger) {
+ +              ret = cft->trigger(css, (unsigned int)cft->private);
         } else {
- -              s64 val = simple_strtoll(strstrip(buffer), &end, 0);
- -              if (*end)
- -                      return -EINVAL;
- -              retval = cft->write_s64(css, cft, val);
+ +              ret = -EINVAL;
         }
- -      if (!retval)
- -              retval = nbytes;
- -      return retval;
+ +out_free:
+ +      kfree(buf);
+ +      return ret ?: nbytes;
   }
   
- -static ssize_t cgroup_write_string(struct cgroup_subsys_state *css,
- -                                 struct cftype *cft, struct file *file,
- -                                 const char __user *userbuf, size_t nbytes,
- -                                 loff_t *unused_ppos)
+ +/*
+ + * seqfile ops/methods for returning structured data. Currently just
+ + * supports string->u64 maps, but can be extended in future.
+ + */
+ +
+ +static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
   {
- -      char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
- -      int retval = 0;
- -      size_t max_bytes = cft->max_write_len;
- -      char *buffer = local_buffer;
+ +      struct cftype *cft = seq_cft(seq);
   
- -      if (!max_bytes)
- -              max_bytes = sizeof(local_buffer) - 1;
- -      if (nbytes >= max_bytes)
- -              return -E2BIG;
- -      /* Allocate a dynamic buffer if we need one */
- -      if (nbytes >= sizeof(local_buffer)) {
- -              buffer = kmalloc(nbytes + 1, GFP_KERNEL);
- -              if (buffer == NULL)
- -                      return -ENOMEM;
- -      }
- -      if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
- -              retval = -EFAULT;
- -              goto out;
+ +      if (cft->seq_start) {
+ +              return cft->seq_start(seq, ppos);
+ +      } else {
+ +              /*
+ +               * The same behavior and code as single_open().  Returns
+ +               * !NULL if pos is at the beginning; otherwise, NULL.
+ +               */
+ +              return NULL + !*ppos;
         }
- -
- -      buffer[nbytes] = 0;     /* nul-terminate */
- -      retval = cft->write_string(css, cft, strstrip(buffer));
- -      if (!retval)
- -              retval = nbytes;
- -out:
- -      if (buffer != local_buffer)
- -              kfree(buffer);
- -      return retval;
   }
   
- -static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
- -                               size_t nbytes, loff_t *ppos)
+ +static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
   {
- -      struct cfent *cfe = __d_cfe(file->f_dentry);
- -      struct cftype *cft = __d_cft(file->f_dentry);
- -      struct cgroup_subsys_state *css = cfe->css;
+ +      struct cftype *cft = seq_cft(seq);
   
- -      if (cft->write)
- -              return cft->write(css, cft, file, buf, nbytes, ppos);
- -      if (cft->write_u64 || cft->write_s64)
- -              return cgroup_write_X64(css, cft, file, buf, nbytes, ppos);
- -      if (cft->write_string)
- -              return cgroup_write_string(css, cft, file, buf, nbytes, ppos);
- -      if (cft->trigger) {
- -              int ret = cft->trigger(css, (unsigned int)cft->private);
- -              return ret ? ret : nbytes;
+ +      if (cft->seq_next) {
+ +              return cft->seq_next(seq, v, ppos);
+ +      } else {
+ +              /*
+ +               * The same behavior and code as single_open(), always
+ +               * terminate after the initial read.
+ +               */
+ +              ++*ppos;
+ +              return NULL;
         }
- -      return -EINVAL;
   }
   
- -static ssize_t cgroup_read_u64(struct cgroup_subsys_state *css,
- -                             struct cftype *cft, struct file *file,
- -                             char __user *buf, size_t nbytes, loff_t *ppos)
+ +static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
   {
- -      char tmp[CGROUP_LOCAL_BUFFER_SIZE];
- -      u64 val = cft->read_u64(css, cft);
- -      int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
+ +      struct cftype *cft = seq_cft(seq);
   
- -      return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
+ +      if (cft->seq_stop)
+ +              cft->seq_stop(seq, v);
   }
   
- -static ssize_t cgroup_read_s64(struct cgroup_subsys_state *css,
- -                             struct cftype *cft, struct file *file,
- -                             char __user *buf, size_t nbytes, loff_t *ppos)
+ +static int cgroup_seqfile_show(struct seq_file *m, void *arg)
   {
- -      char tmp[CGROUP_LOCAL_BUFFER_SIZE];
- -      s64 val = cft->read_s64(css, cft);
- -      int len = sprintf(tmp, "%lld\n", (long long) val);
- -
- -      return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
- -}
+ +      struct cftype *cft = seq_cft(m);
+ +      struct cgroup_subsys_state *css = seq_css(m);
   
- -static ssize_t cgroup_file_read(struct file *file, char __user *buf,
- -                              size_t nbytes, loff_t *ppos)
- -{
- -      struct cfent *cfe = __d_cfe(file->f_dentry);
- -      struct cftype *cft = __d_cft(file->f_dentry);
- -      struct cgroup_subsys_state *css = cfe->css;
+ +      if (cft->seq_show)
+ +              return cft->seq_show(m, arg);
   
- -      if (cft->read)
- -              return cft->read(css, cft, file, buf, nbytes, ppos);
         if (cft->read_u64)
- -              return cgroup_read_u64(css, cft, file, buf, nbytes, ppos);
- -      if (cft->read_s64)
- -              return cgroup_read_s64(css, cft, file, buf, nbytes, ppos);
- -      return -EINVAL;
- -}
- -
- -/*
- - * seqfile ops/methods for returning structured data. Currently just
- - * supports string->u64 maps, but can be extended in future.
- - */
- -
- -static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
- -{
- -      struct seq_file *sf = cb->state;
- -      return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
- -}
- -
- -static int cgroup_seqfile_show(struct seq_file *m, void *arg)
- -{
- -      struct cfent *cfe = m->private;
- -      struct cftype *cft = cfe->type;
- -      struct cgroup_subsys_state *css = cfe->css;
- -
- -      if (cft->read_map) {
- -              struct cgroup_map_cb cb = {
- -                      .fill = cgroup_map_add,
- -                      .state = m,
- -              };
- -              return cft->read_map(css, cft, &cb);
- -      }
- -      return cft->read_seq_string(css, cft, m);
+ +              seq_printf(m, "%llu\n", cft->read_u64(css, cft));
+ +      else if (cft->read_s64)
+ +              seq_printf(m, "%lld\n", cft->read_s64(css, cft));
+ +      else
+ +              return -EINVAL;
+ +      return 0;
   }
   
- -static const struct file_operations cgroup_seqfile_operations = {
- -      .read = seq_read,
- -      .write = cgroup_file_write,
- -      .llseek = seq_lseek,
- -      .release = cgroup_file_release,
+ +static struct seq_operations cgroup_seq_operations = {
+ +      .start          = cgroup_seqfile_start,
+ +      .next           = cgroup_seqfile_next,
+ +      .stop           = cgroup_seqfile_stop,
+ +      .show           = cgroup_seqfile_show,
   };
   
   static int cgroup_file_open(struct inode *inode, struct file *file)
@@@ -2355,7 -2439,6 +2355,7 @@@
         struct cftype *cft = __d_cft(file->f_dentry);
         struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent);
         struct cgroup_subsys_state *css;
+ +      struct cgroup_open_file *of;
         int err;
   
         err = generic_file_open(inode, file);
@@@ -2385,26 -2468,32 +2385,26 @@@
         WARN_ON_ONCE(cfe->css && cfe->css != css);
         cfe->css = css;
   
- -      if (cft->read_map || cft->read_seq_string) {
- -              file->f_op = &cgroup_seqfile_operations;
- -              err = single_open(file, cgroup_seqfile_show, cfe);
- -      } else if (cft->open) {
- -              err = cft->open(inode, file);
+ +      of = __seq_open_private(file, &cgroup_seq_operations,
+ +                              sizeof(struct cgroup_open_file));
+ +      if (of) {
+ +              of->cfe = cfe;
+ +              return 0;
         }
   
- -      if (css->ss && err)
+ +      if (css->ss)
                 css_put(css);
- -      return err;
+ +      return -ENOMEM;
   }
   
   static int cgroup_file_release(struct inode *inode, struct file *file)
   {
         struct cfent *cfe = __d_cfe(file->f_dentry);
- -      struct cftype *cft = __d_cft(file->f_dentry);
         struct cgroup_subsys_state *css = cfe->css;
- -      int ret = 0;
   
- -      if (cft->release)
- -              ret = cft->release(inode, file);
         if (css->ss)
                 css_put(css);
- -      if (file->f_op == &cgroup_seqfile_operations)
- -              single_release(inode, file);
- -      return ret;
+ +      return seq_release_private(inode, file);
   }
   
   /*
@@@ -2515,7 -2604,7 +2515,7 @@@ static ssize_t cgroup_listxattr(struct 
   }
   
   static const struct file_operations cgroup_file_operations = {
- -      .read = cgroup_file_read,
+ +      .read = seq_read,
         .write = cgroup_file_write,
         .llseek = generic_file_llseek,
         .open = cgroup_file_open,
@@@ -2540,6 -2629,16 +2540,6 @@@ static const struct inode_operations cg
         .removexattr = cgroup_removexattr,
   };
   
- -/*
- - * Check if a file is a control file
- - */
- -static inline struct cftype *__file_cft(struct file *file)
- -{
- -      if (file_inode(file)->i_fop != &cgroup_file_operations)
- -              return ERR_PTR(-EINVAL);
- -      return __d_cft(file->f_dentry);
- -}
- -
   static int cgroup_create_file(struct dentry *dentry, umode_t mode,
                                 struct super_block *sb)
   {
@@@ -2597,11 -2696,12 +2597,11 @@@ static umode_t cgroup_file_mode(const s
         if (cft->mode)
                 return cft->mode;
   
- -      if (cft->read || cft->read_u64 || cft->read_s64 ||
- -          cft->read_map || cft->read_seq_string)
+ +      if (cft->read_u64 || cft->read_s64 || cft->seq_show)
                 mode |= S_IRUGO;
   
- -      if (cft->write || cft->write_u64 || cft->write_s64 ||
- -          cft->write_string || cft->trigger)
+ +      if (cft->write_u64 || cft->write_s64 || cft->write_string ||
+ +          cft->trigger)
                 mode |= S_IWUSR;
   
         return mode;
@@@ -3394,12 -3494,14 +3394,12 @@@ struct cgroup_pidlist 
         pid_t *list;
         /* how many elements the above list has */
         int length;
- -      /* how many files are using the current array */
- -      int use_count;
         /* each of these stored in a list by its cgroup */
         struct list_head links;
         /* pointer to the cgroup we belong to, for list removal purposes */
         struct cgroup *owner;
- -      /* protects the other fields */
- -      struct rw_semaphore rwsem;
+ +      /* for delayed destruction */
+ +      struct delayed_work destroy_dwork;
   };
   
   /*
@@@ -3415,7 -3517,6 +3415,7 @@@ static void *pidlist_allocate(int count
         else
                 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
   }
+ +
   static void pidlist_free(void *p)
   {
         if (is_vmalloc_addr(p))
@@@ -3425,47 -3526,6 +3425,47 @@@
   }
   
   /*
+ + * Used to destroy all pidlists lingering waiting for destroy timer.  None
+ + * should be left afterwards.
+ + */
+ +static void cgroup_pidlist_destroy_all(struct cgroup *cgrp)
+ +{
+ +      struct cgroup_pidlist *l, *tmp_l;
+ +
+ +      mutex_lock(&cgrp->pidlist_mutex);
+ +      list_for_each_entry_safe(l, tmp_l, &cgrp->pidlists, links)
+ +              mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, 0);
+ +      mutex_unlock(&cgrp->pidlist_mutex);
+ +
+ +      flush_workqueue(cgroup_pidlist_destroy_wq);
+ +      BUG_ON(!list_empty(&cgrp->pidlists));
+ +}
+ +
+ +static void cgroup_pidlist_destroy_work_fn(struct work_struct *work)
+ +{
+ +      struct delayed_work *dwork = to_delayed_work(work);
+ +      struct cgroup_pidlist *l = container_of(dwork, struct cgroup_pidlist,
+ +                                              destroy_dwork);
+ +      struct cgroup_pidlist *tofree = NULL;
+ +
+ +      mutex_lock(&l->owner->pidlist_mutex);
+ +
+ +      /*
+ +       * Destroy iff we didn't get queued again.  The state won't change
+ +       * as destroy_dwork can only be queued while locked.
+ +       */
+ +      if (!delayed_work_pending(dwork)) {
+ +              list_del(&l->links);
+ +              pidlist_free(l->list);
+ +              put_pid_ns(l->key.ns);
+ +              tofree = l;
+ +      }
+ +
+ +      mutex_unlock(&l->owner->pidlist_mutex);
+ +      kfree(tofree);
+ +}
+ +
+ +/*
    * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
    * Returns the number of unique elements.
    */
@@@ -3495,92 -3555,52 +3495,92 @@@ after
         return dest;
   }
   
+ +/*
+ + * The two pid files - task and cgroup.procs - guaranteed that the result
+ + * is sorted, which forced this whole pidlist fiasco.  As pid order is
+ + * different per namespace, each namespace needs differently sorted list,
+ + * making it impossible to use, for example, single rbtree of member tasks
+ + * sorted by task pointer.  As pidlists can be fairly large, allocating one
+ + * per open file is dangerous, so cgroup had to implement shared pool of
+ + * pidlists keyed by cgroup and namespace.
+ + *
+ + * All this extra complexity was caused by the original implementation
+ + * committing to an entirely unnecessary property.  In the long term, we
+ + * want to do away with it.  Explicitly scramble sort order if
+ + * sane_behavior so that no such expectation exists in the new interface.
+ + *
+ + * Scrambling is done by swapping every two consecutive bits, which is
+ + * non-identity one-to-one mapping which disturbs sort order sufficiently.
+ + */
+ +static pid_t pid_fry(pid_t pid)
+ +{
+ +      unsigned a = pid & 0x55555555;
+ +      unsigned b = pid & 0xAAAAAAAA;
+ +
+ +      return (a << 1) | (b >> 1);
+ +}
+ +
+ +static pid_t cgroup_pid_fry(struct cgroup *cgrp, pid_t pid)
+ +{
+ +      if (cgroup_sane_behavior(cgrp))
+ +              return pid_fry(pid);
+ +      else
+ +              return pid;
+ +}
+ +
   static int cmppid(const void *a, const void *b)
   {
         return *(pid_t *)a - *(pid_t *)b;
   }
   
+ +static int fried_cmppid(const void *a, const void *b)
+ +{
+ +      return pid_fry(*(pid_t *)a) - pid_fry(*(pid_t *)b);
+ +}
+ +
+ +static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
+ +                                                enum cgroup_filetype type)
+ +{
+ +      struct cgroup_pidlist *l;
+ +      /* don't need task_nsproxy() if we're looking at ourself */
+ +      struct pid_namespace *ns = task_active_pid_ns(current);
+ +
+ +      lockdep_assert_held(&cgrp->pidlist_mutex);
+ +
+ +      list_for_each_entry(l, &cgrp->pidlists, links)
+ +              if (l->key.type == type && l->key.ns == ns)
+ +                      return l;
+ +      return NULL;
+ +}
+ +
   /*
    * find the appropriate pidlist for our purpose (given procs vs tasks)
    * returns with the lock on that pidlist already held, and takes care
    * of the use count, or returns NULL with no locks held if we're out of
    * memory.
    */
- -static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
- -                                                enum cgroup_filetype type)
+ +static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp,
+ +                                              enum cgroup_filetype type)
   {
         struct cgroup_pidlist *l;
- -      /* don't need task_nsproxy() if we're looking at ourself */
- -      struct pid_namespace *ns = task_active_pid_ns(current);
   
- -      /*
- -       * We can't drop the pidlist_mutex before taking the l->rwsem in case
- -       * the last ref-holder is trying to remove l from the list at the same
- -       * time. Holding the pidlist_mutex precludes somebody taking whichever
- -       * list we find out from under us - compare release_pid_array().
- -       */
- -      mutex_lock(&cgrp->pidlist_mutex);
- -      list_for_each_entry(l, &cgrp->pidlists, links) {
- -              if (l->key.type == type && l->key.ns == ns) {
- -                      /* make sure l doesn't vanish out from under us */
- -                      down_write(&l->rwsem);
- -                      mutex_unlock(&cgrp->pidlist_mutex);
- -                      return l;
- -              }
- -      }
+ +      lockdep_assert_held(&cgrp->pidlist_mutex);
+ +
+ +      l = cgroup_pidlist_find(cgrp, type);
+ +      if (l)
+ +              return l;
+ +
         /* entry not found; create a new one */
         l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
- -      if (!l) {
- -              mutex_unlock(&cgrp->pidlist_mutex);
+ +      if (!l)
                 return l;
- -      }
- -      init_rwsem(&l->rwsem);
- -      down_write(&l->rwsem);
+ +
+ +      INIT_DELAYED_WORK(&l->destroy_dwork, cgroup_pidlist_destroy_work_fn);
         l->key.type = type;
- -      l->key.ns = get_pid_ns(ns);
+ +      /* don't need task_nsproxy() if we're looking at ourself */
+ +      l->key.ns = get_pid_ns(task_active_pid_ns(current));
         l->owner = cgrp;
         list_add(&l->links, &cgrp->pidlists);
- -      mutex_unlock(&cgrp->pidlist_mutex);
         return l;
   }
   
@@@ -3597,8 -3617,6 +3597,8 @@@ static int pidlist_array_load(struct cg
         struct task_struct *tsk;
         struct cgroup_pidlist *l;
   
+ +      lockdep_assert_held(&cgrp->pidlist_mutex);
+ +
         /*
          * If cgroup gets more users after we read count, we won't have
          * enough space - tough.  This race is indistinguishable to the
@@@ -3625,24 -3643,20 +3625,24 @@@
         css_task_iter_end(&it);
         length = n;
         /* now sort & (if procs) strip out duplicates */
- -      sort(array, length, sizeof(pid_t), cmppid, NULL);
+ +      if (cgroup_sane_behavior(cgrp))
+ +              sort(array, length, sizeof(pid_t), fried_cmppid, NULL);
+ +      else
+ +              sort(array, length, sizeof(pid_t), cmppid, NULL);
         if (type == CGROUP_FILE_PROCS)
                 length = pidlist_uniq(array, length);
- -      l = cgroup_pidlist_find(cgrp, type);
+ +
+ +      l = cgroup_pidlist_find_create(cgrp, type);
         if (!l) {
+ +              mutex_unlock(&cgrp->pidlist_mutex);
                 pidlist_free(array);
                 return -ENOMEM;
         }
- -      /* store array, freeing old if necessary - lock already held */
+ +
+ +      /* store array, freeing old if necessary */
         pidlist_free(l->list);
         l->list = array;
         l->length = length;
- -      l->use_count++;
- -      up_write(&l->rwsem);
         *lp = l;
         return 0;
   }
@@@ -3716,45 -3730,20 +3716,45 @@@ static void *cgroup_pidlist_start(struc
          * after a seek to the start). Use a binary-search to find the
          * next pid to display, if any
          */
- -      struct cgroup_pidlist *l = s->private;
+ +      struct cgroup_open_file *of = s->private;
+ +      struct cgroup *cgrp = seq_css(s)->cgroup;
+ +      struct cgroup_pidlist *l;
+ +      enum cgroup_filetype type = seq_cft(s)->private;
         int index = 0, pid = *pos;
- -      int *iter;
+ +      int *iter, ret;
+ +
+ +      mutex_lock(&cgrp->pidlist_mutex);
+ +
+ +      /*
+ +       * !NULL @of->priv indicates that this isn't the first start()
+ +       * after open.  If the matching pidlist is around, we can use that.
+ +       * Look for it.  Note that @of->priv can't be used directly.  It
+ +       * could already have been destroyed.
+ +       */
+ +      if (of->priv)
+ +              of->priv = cgroup_pidlist_find(cgrp, type);
+ +
+ +      /*
+ +       * Either this is the first start() after open or the matching
+ +       * pidlist has been destroyed inbetween.  Create a new one.
+ +       */
+ +      if (!of->priv) {
+ +              ret = pidlist_array_load(cgrp, type,
+ +                                       (struct cgroup_pidlist **)&of->priv);
+ +              if (ret)
+ +                      return ERR_PTR(ret);
+ +      }
+ +      l = of->priv;
   
- -      down_read(&l->rwsem);
         if (pid) {
                 int end = l->length;
   
                 while (index < end) {
                         int mid = (index + end) / 2;
- -                      if (l->list[mid] == pid) {
+ +                      if (cgroup_pid_fry(cgrp, l->list[mid]) == pid) {
                                 index = mid;
                                 break;
- -                      } else if (l->list[mid] <= pid)
+ +                      } else if (cgroup_pid_fry(cgrp, l->list[mid]) <= pid)
                                 index = mid + 1;
                         else
                                 end = mid;
@@@ -3765,25 -3754,19 +3765,25 @@@
                 return NULL;
         /* Update the abstract position to be the actual pid that we found */
         iter = l->list + index;
- -      *pos = *iter;
+ +      *pos = cgroup_pid_fry(cgrp, *iter);
         return iter;
   }
   
   static void cgroup_pidlist_stop(struct seq_file *s, void *v)
   {
- -      struct cgroup_pidlist *l = s->private;
- -      up_read(&l->rwsem);
+ +      struct cgroup_open_file *of = s->private;
+ +      struct cgroup_pidlist *l = of->priv;
+ +
+ +      if (l)
+ +              mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork,
+ +                               CGROUP_PIDLIST_DESTROY_DELAY);
+ +      mutex_unlock(&seq_css(s)->cgroup->pidlist_mutex);
   }
   
   static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
   {
- -      struct cgroup_pidlist *l = s->private;
+ +      struct cgroup_open_file *of = s->private;
+ +      struct cgroup_pidlist *l = of->priv;
         pid_t *p = v;
         pid_t *end = l->list + l->length;
         /*
@@@ -3794,7 -3777,7 +3794,7 @@@
         if (p >= end) {
                 return NULL;
         } else {
- -              *pos = *p;
+ +              *pos = cgroup_pid_fry(seq_css(s)->cgroup, *p);
                 return p;
         }
   }
@@@ -3815,6 -3798,92 +3815,6 @@@ static const struct seq_operations cgro
         .show = cgroup_pidlist_show,
   };
   
- -static void cgroup_release_pid_array(struct cgroup_pidlist *l)
- -{
- -      /*
- -       * the case where we're the last user of this particular pidlist will
- -       * have us remove it from the cgroup's list, which entails taking the
- -       * mutex. since in pidlist_find the pidlist->lock depends on cgroup->
- -       * pidlist_mutex, we have to take pidlist_mutex first.
- -       */
- -      mutex_lock(&l->owner->pidlist_mutex);
- -      down_write(&l->rwsem);
- -      BUG_ON(!l->use_count);
- -      if (!--l->use_count) {
- -              /* we're the last user if refcount is 0; remove and free */
- -              list_del(&l->links);
- -              mutex_unlock(&l->owner->pidlist_mutex);
- -              pidlist_free(l->list);
- -              put_pid_ns(l->key.ns);
- -              up_write(&l->rwsem);
- -              kfree(l);
- -              return;
- -      }
- -      mutex_unlock(&l->owner->pidlist_mutex);
- -      up_write(&l->rwsem);
- -}
- -
- -static int cgroup_pidlist_release(struct inode *inode, struct file *file)
- -{
- -      struct cgroup_pidlist *l;
- -      if (!(file->f_mode & FMODE_READ))
- -              return 0;
- -      /*
- -       * the seq_file will only be initialized if the file was opened for
- -       * reading; hence we check if it's not null only in that case.
- -       */
- -      l = ((struct seq_file *)file->private_data)->private;
- -      cgroup_release_pid_array(l);
- -      return seq_release(inode, file);
- -}
- -
- -static const struct file_operations cgroup_pidlist_operations = {
- -      .read = seq_read,
- -      .llseek = seq_lseek,
- -      .write = cgroup_file_write,
- -      .release = cgroup_pidlist_release,
- -};
- -
- -/*
- - * The following functions handle opens on a file that displays a pidlist
- - * (tasks or procs). Prepare an array of the process/thread IDs of whoever's
- - * in the cgroup.
- - */
- -/* helper function for the two below it */
- -static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
- -{
- -      struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
- -      struct cgroup_pidlist *l;
- -      int retval;
- -
- -      /* Nothing to do for write-only files */
- -      if (!(file->f_mode & FMODE_READ))
- -              return 0;
- -
- -      /* have the array populated */
- -      retval = pidlist_array_load(cgrp, type, &l);
- -      if (retval)
- -              return retval;
- -      /* configure file information */
- -      file->f_op = &cgroup_pidlist_operations;
- -
- -      retval = seq_open(file, &cgroup_pidlist_seq_operations);
- -      if (retval) {
- -              cgroup_release_pid_array(l);
- -              return retval;
- -      }
- -      ((struct seq_file *)file->private_data)->private = l;
- -      return 0;
- -}
- -static int cgroup_tasks_open(struct inode *unused, struct file *file)
- -{
- -      return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
- -}
- -static int cgroup_procs_open(struct inode *unused, struct file *file)
- -{
- -      return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
- -}
- -
   static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
                                          struct cftype *cft)
   {
@@@ -3849,6 -3918,202 +3849,6 @@@ static void cgroup_dput(struct cgroup *
         deactivate_super(sb);
   }
   
- -/*
- - * Unregister event and free resources.
- - *
- - * Gets called from workqueue.
- - */
- -static void cgroup_event_remove(struct work_struct *work)
- -{
- -      struct cgroup_event *event = container_of(work, struct cgroup_event,
- -                      remove);
- -      struct cgroup_subsys_state *css = event->css;
- -
- -      remove_wait_queue(event->wqh, &event->wait);
- -
- -      event->cft->unregister_event(css, event->cft, event->eventfd);
- -
- -      /* Notify userspace the event is going away. */
- -      eventfd_signal(event->eventfd, 1);
- -
- -      eventfd_ctx_put(event->eventfd);
- -      kfree(event);
- -      css_put(css);
- -}
- -
- -/*
- - * Gets called on POLLHUP on eventfd when user closes it.
- - *
- - * Called with wqh->lock held and interrupts disabled.
- - */
- -static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
- -              int sync, void *key)
- -{
- -      struct cgroup_event *event = container_of(wait,
- -                      struct cgroup_event, wait);
- -      struct cgroup *cgrp = event->css->cgroup;
- -      unsigned long flags = (unsigned long)key;
- -
- -      if (flags & POLLHUP) {
- -              /*
- -               * If the event has been detached at cgroup removal, we
- -               * can simply return knowing the other side will cleanup
- -               * for us.
- -               *
- -               * We can't race against event freeing since the other
- -               * side will require wqh->lock via remove_wait_queue(),
- -               * which we hold.
- -               */
- -              spin_lock(&cgrp->event_list_lock);
- -              if (!list_empty(&event->list)) {
- -                      list_del_init(&event->list);
- -                      /*
- -                       * We are in atomic context, but cgroup_event_remove()
- -                       * may sleep, so we have to call it in workqueue.
- -                       */
- -                      schedule_work(&event->remove);
- -              }
- -              spin_unlock(&cgrp->event_list_lock);
- -      }
- -
- -      return 0;
- -}
- -
- -static void cgroup_event_ptable_queue_proc(struct file *file,
- -              wait_queue_head_t *wqh, poll_table *pt)
- -{
- -      struct cgroup_event *event = container_of(pt,
- -                      struct cgroup_event, pt);
- -
- -      event->wqh = wqh;
- -      add_wait_queue(wqh, &event->wait);
- -}
- -
- -/*
- - * Parse input and register new cgroup event handler.
- - *
- - * Input must be in format '<event_fd> <control_fd> <args>'.
- - * Interpretation of args is defined by control file implementation.
- - */
- -static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
- -                                    struct cftype *cft, const char *buffer)
- -{
- -      struct cgroup *cgrp = dummy_css->cgroup;
- -      struct cgroup_event *event;
- -      struct cgroup_subsys_state *cfile_css;
- -      unsigned int efd, cfd;
- -      struct fd efile;
- -      struct fd cfile;
- -      char *endp;
- -      int ret;
- -
- -      efd = simple_strtoul(buffer, &endp, 10);
- -      if (*endp != ' ')
- -              return -EINVAL;
- -      buffer = endp + 1;
- -
- -      cfd = simple_strtoul(buffer, &endp, 10);
- -      if ((*endp != ' ') && (*endp != '\0'))
- -              return -EINVAL;
- -      buffer = endp + 1;
- -
- -      event = kzalloc(sizeof(*event), GFP_KERNEL);
- -      if (!event)
- -              return -ENOMEM;
- -
- -      INIT_LIST_HEAD(&event->list);
- -      init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
- -      init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
- -      INIT_WORK(&event->remove, cgroup_event_remove);
- -
- -      efile = fdget(efd);
- -      if (!efile.file) {
- -              ret = -EBADF;
- -              goto out_kfree;
- -      }
- -
- -      event->eventfd = eventfd_ctx_fileget(efile.file);
- -      if (IS_ERR(event->eventfd)) {
- -              ret = PTR_ERR(event->eventfd);
- -              goto out_put_efile;
- -      }
- -
- -      cfile = fdget(cfd);
- -      if (!cfile.file) {
- -              ret = -EBADF;
- -              goto out_put_eventfd;
- -      }
- -
- -      /* the process need read permission on control file */
- -      /* AV: shouldn't we check that it's been opened for read instead? */
- -      ret = inode_permission(file_inode(cfile.file), MAY_READ);
- -      if (ret < 0)
- -              goto out_put_cfile;
- -
- -      event->cft = __file_cft(cfile.file);
- -      if (IS_ERR(event->cft)) {
- -              ret = PTR_ERR(event->cft);
- -              goto out_put_cfile;
- -      }
- -
- -      if (!event->cft->ss) {
- -              ret = -EBADF;
- -              goto out_put_cfile;
- -      }
- -
- -      /*
- -       * Determine the css of @cfile, verify it belongs to the same
- -       * cgroup as cgroup.event_control, and associate @event with it.
- -       * Remaining events are automatically removed on cgroup destruction
- -       * but the removal is asynchronous, so take an extra ref.
- -       */
- -      rcu_read_lock();
- -
- -      ret = -EINVAL;
- -      event->css = cgroup_css(cgrp, event->cft->ss);
- -      cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss);
- -      if (event->css && event->css == cfile_css && css_tryget(event->css))
- -              ret = 0;
- -
- -      rcu_read_unlock();
- -      if (ret)
- -              goto out_put_cfile;
- -
- -      if (!event->cft->register_event || !event->cft->unregister_event) {
- -              ret = -EINVAL;
- -              goto out_put_css;
- -      }
- -
- -      ret = event->cft->register_event(event->css, event->cft,
- -                      event->eventfd, buffer);
- -      if (ret)
- -              goto out_put_css;
- -
- -      efile.file->f_op->poll(efile.file, &event->pt);
- -
- -      spin_lock(&cgrp->event_list_lock);
- -      list_add(&event->list, &cgrp->event_list);
- -      spin_unlock(&cgrp->event_list_lock);
- -
- -      fdput(cfile);
- -      fdput(efile);
- -
- -      return 0;
- -
- -out_put_css:
- -      css_put(event->css);
- -out_put_cfile:
- -      fdput(cfile);
- -out_put_eventfd:
- -      eventfd_ctx_put(event->eventfd);
- -out_put_efile:
- -      fdput(efile);
- -out_kfree:
- -      kfree(event);
- -
- -      return ret;
- -}
- -
   static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
                                       struct cftype *cft)
   {
@@@ -3868,15 -4133,17 +3868,15 @@@ static int cgroup_clone_children_write(
   static struct cftype cgroup_base_files[] = {
         {
                 .name = "cgroup.procs",
- -              .open = cgroup_procs_open,
+ +              .seq_start = cgroup_pidlist_start,
+ +              .seq_next = cgroup_pidlist_next,
+ +              .seq_stop = cgroup_pidlist_stop,
+ +              .seq_show = cgroup_pidlist_show,
+ +              .private = CGROUP_FILE_PROCS,
                 .write_u64 = cgroup_procs_write,
- -              .release = cgroup_pidlist_release,
                 .mode = S_IRUGO | S_IWUSR,
         },
         {
- -              .name = "cgroup.event_control",
- -              .write_string = cgroup_write_event_control,
- -              .mode = S_IWUGO,
- -      },
- -      {
                 .name = "cgroup.clone_children",
                 .flags = CFTYPE_INSANE,
                 .read_u64 = cgroup_clone_children_read,
@@@ -3885,7 -4152,7 +3885,7 @@@
         {
                 .name = "cgroup.sane_behavior",
                 .flags = CFTYPE_ONLY_ON_ROOT,
- -              .read_seq_string = cgroup_sane_behavior_show,
+ +              .seq_show = cgroup_sane_behavior_show,
         },
   
         /*
@@@ -3896,12 -4163,9 +3896,12 @@@
         {
                 .name = "tasks",
                 .flags = CFTYPE_INSANE,         /* use "procs" instead */
- -              .open = cgroup_tasks_open,
+ +              .seq_start = cgroup_pidlist_start,
+ +              .seq_next = cgroup_pidlist_next,
+ +              .seq_stop = cgroup_pidlist_stop,
+ +              .seq_show = cgroup_pidlist_show,
+ +              .private = CGROUP_FILE_TASKS,
                 .write_u64 = cgroup_tasks_write,
- -              .release = cgroup_pidlist_release,
                 .mode = S_IRUGO | S_IWUSR,
         },
         {
@@@ -3913,7 -4177,7 +3913,7 @@@
         {
                 .name = "release_agent",
                 .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT,
- -              .read_seq_string = cgroup_release_agent_show,
+ +              .seq_show = cgroup_release_agent_show,
                 .write_string = cgroup_release_agent_write,
                 .max_write_len = PATH_MAX,
         },
@@@ -4162,14 -4426,6 +4162,6 @@@ static long cgroup_create(struct cgrou
         list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
         root->number_of_cgroups++;
   
-       /* each css holds a ref to the cgroup's dentry and the parent css */
-       for_each_root_subsys(root, ss) {
-               struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
- 
-               dget(dentry);
-               css_get(css->parent);
-       }
- 
         /* hold a ref to the parent's dentry */
         dget(parent->dentry);
   
@@@ -4181,6 -4437,13 +4173,13 @@@
                 if (err)
                         goto err_destroy;
   
+               /* each css holds a ref to the cgroup's dentry and parent css */
+               dget(dentry);
+               css_get(css->parent);
+ 
+               /* mark it consumed for error path */
+               css_ar[ss->subsys_id] = NULL;
+ 
                 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
                     parent->parent) {
                         pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
@@@ -4227,6 -4490,14 +4226,14 @@@ err_free_cgrp
         return err;
   
   err_destroy:
+       for_each_root_subsys(root, ss) {
+               struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
+ 
+               if (css) {
+                       percpu_ref_cancel_init(&css->refcnt);
+                       ss->css_free(css);
+               }
+       }
         cgroup_destroy_locked(cgrp);
         mutex_unlock(&cgroup_mutex);
         mutex_unlock(&dentry->d_inode->i_mutex);
@@@ -4349,6 -4620,7 +4356,6 @@@ static int cgroup_destroy_locked(struc
         __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
   {
         struct dentry *d = cgrp->dentry;
- -      struct cgroup_event *event, *tmp;
         struct cgroup_subsys *ss;
         struct cgroup *child;
         bool empty;
@@@ -4387,8 -4659,12 +4394,12 @@@
          * will be invoked to perform the rest of destruction once the
          * percpu refs of all css's are confirmed to be killed.
          */
-       for_each_root_subsys(cgrp->root, ss)
-               kill_css(cgroup_css(cgrp, ss));
+       for_each_root_subsys(cgrp->root, ss) {
+               struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
+ 
+               if (css)
+                       kill_css(css);
+       }
   
         /*
          * Mark @cgrp dead.  This prevents further task migration and child
@@@ -4423,6 -4699,18 +4434,6 @@@
         dget(d);
         cgroup_d_remove_dir(d);
   
- -      /*
- -       * Unregister events and notify userspace.
- -       * Notify userspace about cgroup removing only after rmdir of cgroup
- -       * directory to avoid race between userspace and kernelspace.
- -       */
- -      spin_lock(&cgrp->event_list_lock);
- -      list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
- -              list_del_init(&event->list);
- -              schedule_work(&event->remove);
- -      }
- -      spin_unlock(&cgrp->event_list_lock);
- -
         return 0;
   };
   
@@@ -4809,15 -5097,6 +4820,15 @@@ static int __init cgroup_wq_init(void
          */
         cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
         BUG_ON(!cgroup_destroy_wq);
+ +
+ +      /*
+ +       * Used to destroy pidlists and separate to serve as flush domain.
+ +       * Cap @max_active to 1 too.
+ +       */
+ +      cgroup_pidlist_destroy_wq = alloc_workqueue("cgroup_pidlist_destroy",
+ +                                                  0, 1);
+ +      BUG_ON(!cgroup_pidlist_destroy_wq);
+ +
         return 0;
   }
   core_initcall(cgroup_wq_init);
@@@ -5288,7 -5567,9 +5299,7 @@@ static u64 current_css_set_refcount_rea
         return count;
   }
   
- -static int current_css_set_cg_links_read(struct cgroup_subsys_state *css,
- -                                       struct cftype *cft,
- -                                       struct seq_file *seq)
+ +static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
   {
         struct cgrp_cset_link *link;
         struct css_set *cset;
@@@ -5313,9 -5594,9 +5324,9 @@@
   }
   
   #define MAX_TASKS_SHOWN_PER_CSS 25
- -static int cgroup_css_links_read(struct cgroup_subsys_state *css,
- -                               struct cftype *cft, struct seq_file *seq)
+ +static int cgroup_css_links_read(struct seq_file *seq, void *v)
   {
+ +      struct cgroup_subsys_state *css = seq_css(seq);
         struct cgrp_cset_link *link;
   
         read_lock(&css_set_lock);
@@@ -5361,12 -5642,12 +5372,12 @@@ static struct cftype debug_files[] =  
   
         {
                 .name = "current_css_set_cg_links",
- -              .read_seq_string = current_css_set_cg_links_read,
+ +              .seq_show = current_css_set_cg_links_read,
         },
   
         {
                 .name = "cgroup_css_links",
- -              .read_seq_string = cgroup_css_links_read,
+ +              .seq_show = cgroup_css_links_read,
         },
   
         {
author	Tejun Heo <tj@kernel.org>
	Fri, 6 Dec 2013 20:09:27 +0000 (15:09 -0500)
committer	Tejun Heo <tj@kernel.org>
	Fri, 6 Dec 2013 20:09:27 +0000 (15:09 -0500)