Merge branch 'stable-4.10' of git://git.infradead.org/users/pcmoore/audit

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 14 Dec 2016 22:06:40 +0000 (14:06 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 14 Dec 2016 22:06:40 +0000 (14:06 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 14 Dec 2016 22:06:40 +0000 (14:06 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 14 Dec 2016 22:06:40 +0000 (14:06 -0800)
diff --combined fs/proc/base.c

index 2e5b2e3,9cdb3e4..5ea8363
--- 1/fs/proc/base.c
--- 2/fs/proc/base.c
+++ b/fs/proc/base.c
@@@ -104,12 -104,9 +104,12 @@@
    *    in /proc for a task before it execs a suid executable.
    */
   
+ +static u8 nlink_tid;
+ +static u8 nlink_tgid;
+ +
   struct pid_entry {
         const char *name;
- -      int len;
+ +      unsigned int len;
         umode_t mode;
         const struct inode_operations *iop;
         const struct file_operations *fop;
@@@ -142,13 -139,13 +142,13 @@@
    * Count the number of hardlinks for the pid_entry table, excluding the .
    * and .. links.
    */
- -static unsigned int pid_entry_count_dirs(const struct pid_entry *entries,
+ +static unsigned int __init pid_entry_nlink(const struct pid_entry *entries,
         unsigned int n)
   {
         unsigned int i;
         unsigned int count;
   
- -      count = 0;
+ +      count = 2;
         for (i = 0; i < n; ++i) {
                 if (S_ISDIR(entries[i].mode))
                         ++count;
@@@ -403,6 -400,23 +403,6 @@@ static const struct file_operations pro
         .llseek = generic_file_llseek,
   };
   
- -static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns,
- -                       struct pid *pid, struct task_struct *task)
- -{
- -      struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
- -      if (mm && !IS_ERR(mm)) {
- -              unsigned int nwords = 0;
- -              do {
- -                      nwords += 2;
- -              } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
- -              seq_write(m, mm->saved_auxv, nwords * sizeof(mm->saved_auxv[0]));
- -              mmput(mm);
- -              return 0;
- -      } else
- -              return PTR_ERR(mm);
- -}
- -
- -
   #ifdef CONFIG_KALLSYMS
   /*
    * Provides a wchan file via kallsyms in a proper one-value-per-file format.
@@@ -469,7 -483,7 +469,7 @@@ static int proc_pid_stack(struct seq_fi
                 save_stack_trace_tsk(task, &trace);
   
                 for (i = 0; i < trace.nr_entries; i++) {
- -                      seq_printf(m, "[<%pK>] %pS\n",
+ +                      seq_printf(m, "[<%pK>] %pB\n",
                                    (void *)entries[i], (void *)entries[i]);
                 }
                 unlock_trace(task);
@@@ -695,7 -709,7 +695,7 @@@ int proc_setattr(struct dentry *dentry
         if (attr->ia_valid & ATTR_MODE)
                 return -EPERM;
   
- -      error = inode_change_ok(inode, attr);
+ +      error = setattr_prepare(dentry, attr);
         if (error)
                 return error;
   
@@@ -835,7 -849,6 +835,7 @@@ static ssize_t mem_rw(struct file *file
         unsigned long addr = *ppos;
         ssize_t copied;
         char *page;
+ +      unsigned int flags;
   
         if (!mm)
                 return 0;
@@@ -848,11 -861,6 +848,11 @@@
         if (!atomic_inc_not_zero(&mm->mm_users))
                 goto free;
   
+ +      /* Maybe we should limit FOLL_FORCE to actual ptrace users? */
+ +      flags = FOLL_FORCE;
+ +      if (write)
+ +              flags |= FOLL_WRITE;
+ +
         while (count > 0) {
                 int this_len = min_t(int, count, PAGE_SIZE);
   
@@@ -861,7 -869,7 +861,7 @@@
                         break;
                 }
   
- -              this_len = access_remote_vm(mm, addr, page, this_len, write);
+ +              this_len = access_remote_vm(mm, addr, page, this_len, flags);
                 if (!this_len) {
                         if (!copied)
                                 copied = -EIO;
@@@ -973,7 -981,8 +973,7 @@@ static ssize_t environ_read(struct fil
                 max_len = min_t(size_t, PAGE_SIZE, count);
                 this_len = min(max_len, this_len);
   
- -              retval = access_remote_vm(mm, (env_start + src),
- -                      page, this_len, 0);
+ +              retval = access_remote_vm(mm, (env_start + src), page, this_len, 0);
   
                 if (retval <= 0) {
                         ret = retval;
@@@ -1005,33 -1014,6 +1005,33 @@@ static const struct file_operations pro
         .release        = mem_release,
   };
   
+ +static int auxv_open(struct inode *inode, struct file *file)
+ +{
+ +      return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
+ +}
+ +
+ +static ssize_t auxv_read(struct file *file, char __user *buf,
+ +                      size_t count, loff_t *ppos)
+ +{
+ +      struct mm_struct *mm = file->private_data;
+ +      unsigned int nwords = 0;
+ +
+ +      if (!mm)
+ +              return 0;
+ +      do {
+ +              nwords += 2;
+ +      } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
+ +      return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv,
+ +                                     nwords * sizeof(mm->saved_auxv[0]));
+ +}
+ +
+ +static const struct file_operations proc_auxv_operations = {
+ +      .open           = auxv_open,
+ +      .read           = auxv_read,
+ +      .llseek         = generic_file_llseek,
+ +      .release        = mem_release,
+ +};
+ +
   static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
                             loff_t *ppos)
   {
@@@ -1246,7 -1228,7 +1246,7 @@@ static const struct file_operations pro
   };
   
   #ifdef CONFIG_AUDITSYSCALL
- #define TMPBUFLEN 21
+ #define TMPBUFLEN 11
   static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
                                   size_t count, loff_t *ppos)
   {
@@@ -1667,8 -1649,7 +1667,8 @@@ const struct inode_operations proc_pid_
   
   /* building an inode */
   
- -struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
+ +struct inode *proc_pid_make_inode(struct super_block * sb,
+ +                                struct task_struct *task, umode_t mode)
   {
         struct inode * inode;
         struct proc_inode *ei;
@@@ -1682,9 -1663,8 +1682,9 @@@
   
         /* Common stuff */
         ei = PROC_I(inode);
+ +      inode->i_mode = mode;
         inode->i_ino = get_next_ino();
- -      inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ +      inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
         inode->i_op = &proc_def_inode_operations;
   
         /*
@@@ -1972,7 -1952,7 +1972,7 @@@ out
   
   struct map_files_info {
         fmode_t         mode;
- -      unsigned long   len;
+ +      unsigned int    len;
         unsigned char   name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
   };
   
@@@ -2009,9 -1989,7 +2009,9 @@@ proc_map_files_instantiate(struct inod
         struct proc_inode *ei;
         struct inode *inode;
   
- -      inode = proc_pid_make_inode(dir->i_sb, task);
+ +      inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK |
+ +                                  ((mode & FMODE_READ ) ? S_IRUSR : 0) |
+ +                                  ((mode & FMODE_WRITE) ? S_IWUSR : 0));
         if (!inode)
                 return -ENOENT;
   
@@@ -2020,6 -1998,12 +2020,6 @@@
   
         inode->i_op = &proc_map_files_link_inode_operations;
         inode->i_size = 64;
- -      inode->i_mode = S_IFLNK;
- -
- -      if (mode & FMODE_READ)
- -              inode->i_mode |= S_IRUSR;
- -      if (mode & FMODE_WRITE)
- -              inode->i_mode |= S_IWUSR;
   
         d_set_d_op(dentry, &tid_map_files_dentry_operations);
         d_add(dentry, inode);
@@@ -2296,27 -2280,16 +2296,27 @@@ static ssize_t timerslack_ns_write(stru
         if (!p)
                 return -ESRCH;
   
- -      if (ptrace_may_access(p, PTRACE_MODE_ATTACH_FSCREDS)) {
- -              task_lock(p);
- -              if (slack_ns == 0)
- -                      p->timer_slack_ns = p->default_timer_slack_ns;
- -              else
- -                      p->timer_slack_ns = slack_ns;
- -              task_unlock(p);
- -      } else
- -              count = -EPERM;
+ +      if (p != current) {
+ +              if (!capable(CAP_SYS_NICE)) {
+ +                      count = -EPERM;
+ +                      goto out;
+ +              }
   
+ +              err = security_task_setscheduler(p);
+ +              if (err) {
+ +                      count = err;
+ +                      goto out;
+ +              }
+ +      }
+ +
+ +      task_lock(p);
+ +      if (slack_ns == 0)
+ +              p->timer_slack_ns = p->default_timer_slack_ns;
+ +      else
+ +              p->timer_slack_ns = slack_ns;
+ +      task_unlock(p);
+ +
+ +out:
         put_task_struct(p);
   
         return count;
@@@ -2326,28 -2299,19 +2326,28 @@@ static int timerslack_ns_show(struct se
   {
         struct inode *inode = m->private;
         struct task_struct *p;
- -      int err =  0;
+ +      int err = 0;
   
         p = get_proc_task(inode);
         if (!p)
                 return -ESRCH;
   
- -      if (ptrace_may_access(p, PTRACE_MODE_ATTACH_FSCREDS)) {
- -              task_lock(p);
- -              seq_printf(m, "%llu\n", p->timer_slack_ns);
- -              task_unlock(p);
- -      } else
- -              err = -EPERM;
+ +      if (p != current) {
+ +
+ +              if (!capable(CAP_SYS_NICE)) {
+ +                      err = -EPERM;
+ +                      goto out;
+ +              }
+ +              err = security_task_getscheduler(p);
+ +              if (err)
+ +                      goto out;
+ +      }
+ +
+ +      task_lock(p);
+ +      seq_printf(m, "%llu\n", p->timer_slack_ns);
+ +      task_unlock(p);
   
+ +out:
         put_task_struct(p);
   
         return err;
@@@ -2373,11 -2337,12 +2373,11 @@@ static int proc_pident_instantiate(stru
         struct inode *inode;
         struct proc_inode *ei;
   
- -      inode = proc_pid_make_inode(dir->i_sb, task);
+ +      inode = proc_pid_make_inode(dir->i_sb, task, p->mode);
         if (!inode)
                 goto out;
   
         ei = PROC_I(inode);
- -      inode->i_mode = p->mode;
         if (S_ISDIR(inode->i_mode))
                 set_nlink(inode, 2);    /* Use getattr to fix if necessary */
         if (p->iop)
@@@ -2412,14 -2377,14 +2412,14 @@@ static struct dentry *proc_pident_looku
          * Yes, it does not scale. And it should not. Don't add
          * new entries into /proc/<tgid>/ without very good reasons.
          */
- -      last = &ents[nents - 1];
- -      for (p = ents; p <= last; p++) {
+ +      last = &ents[nents];
+ +      for (p = ents; p < last; p++) {
                 if (p->len != dentry->d_name.len)
                         continue;
                 if (!memcmp(dentry->d_name.name, p->name, p->len))
                         break;
         }
- -      if (p > last)
+ +      if (p >= last)
                 goto out;
   
         error = proc_pident_instantiate(dir, dentry, task, p);
@@@ -2444,7 -2409,7 +2444,7 @@@ static int proc_pident_readdir(struct f
         if (ctx->pos >= nents + 2)
                 goto out;
   
- -      for (p = ents + (ctx->pos - 2); p <= ents + nents - 1; p++) {
+ +      for (p = ents + (ctx->pos - 2); p < ents + nents; p++) {
                 if (!proc_fill_cache(file, ctx, p->name, p->len,
                                 proc_pident_instantiate, task, p))
                         break;
@@@ -2857,7 -2822,7 +2857,7 @@@ static const struct pid_entry tgid_base
         DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
   #endif
         REG("environ",    S_IRUSR, proc_environ_operations),
- -      ONE("auxv",       S_IRUSR, proc_pid_auxv),
+ +      REG("auxv",       S_IRUSR, proc_auxv_operations),
         ONE("status",     S_IRUGO, proc_pid_status),
         ONE("personality", S_IRUSR, proc_pid_personality),
         ONE("limits",     S_IRUGO, proc_pid_limits),
@@@ -3059,15 -3024,17 +3059,15 @@@ static int proc_pid_instantiate(struct 
   {
         struct inode *inode;
   
- -      inode = proc_pid_make_inode(dir->i_sb, task);
+ +      inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
         if (!inode)
                 goto out;
   
- -      inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
         inode->i_op = &proc_tgid_base_inode_operations;
         inode->i_fop = &proc_tgid_base_operations;
         inode->i_flags|=S_IMMUTABLE;
   
- -      set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff,
- -                                                ARRAY_SIZE(tgid_base_stuff)));
+ +      set_nlink(inode, nlink_tgid);
   
         d_set_d_op(dentry, &pid_dentry_operations);
   
@@@ -3243,7 -3210,7 +3243,7 @@@ static const struct pid_entry tid_base_
         DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
   #endif
         REG("environ",   S_IRUSR, proc_environ_operations),
- -      ONE("auxv",      S_IRUSR, proc_pid_auxv),
+ +      REG("auxv",      S_IRUSR, proc_auxv_operations),
         ONE("status",    S_IRUGO, proc_pid_status),
         ONE("personality", S_IRUSR, proc_pid_personality),
         ONE("limits",    S_IRUGO, proc_pid_limits),
@@@ -3350,15 -3317,17 +3350,15 @@@ static int proc_task_instantiate(struc
         struct dentry *dentry, struct task_struct *task, const void *ptr)
   {
         struct inode *inode;
- -      inode = proc_pid_make_inode(dir->i_sb, task);
+ +      inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
   
         if (!inode)
                 goto out;
- -      inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
         inode->i_op = &proc_tid_base_inode_operations;
         inode->i_fop = &proc_tid_base_operations;
         inode->i_flags|=S_IMMUTABLE;
   
- -      set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff,
- -                                                ARRAY_SIZE(tid_base_stuff)));
+ +      set_nlink(inode, nlink_tid);
   
         d_set_d_op(dentry, &pid_dentry_operations);
   
@@@ -3548,9 -3517,3 +3548,9 @@@ static const struct file_operations pro
         .iterate_shared = proc_task_readdir,
         .llseek         = generic_file_llseek,
   };
+ +
+ +void __init set_proc_pid_nlink(void)
+ +{
+ +      nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
+ +      nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
+ +}
diff --combined include/uapi/linux/audit.h

index 208df7b,c8dc97b..1c107cb
--- 1/include/uapi/linux/audit.h
--- 2/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@@ -254,6 -254,7 +254,7 @@@
   #define AUDIT_OBJ_LEV_LOW     22
   #define AUDIT_OBJ_LEV_HIGH    23
   #define AUDIT_LOGINUID_SET    24
+ #define AUDIT_SESSIONID       25      /* Session ID */
   
                                 /* These are ONLY useful when checking
                                  * at syscall exit time (AUDIT_AT_EXIT). */
@@@ -329,11 -330,11 +330,13 @@@ enum 
   #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT    0x00000001
   #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME        0x00000002
   #define AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH  0x00000004
+ +#define AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND   0x00000008
+ #define AUDIT_FEATURE_BITMAP_SESSIONID_FILTER 0x00000010
   #define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \
                                   AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \
                                   AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH | \
-                                 AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND)
++                                AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND | \
+                                 AUDIT_FEATURE_BITMAP_SESSIONID_FILTER)
   
   /* deprecated: AUDIT_VERSION_* */
   #define AUDIT_VERSION_LATEST          AUDIT_FEATURE_BITMAP_ALL
diff --combined kernel/audit.c

index 67b9fbd,4101768..91bff3c
--- 1/kernel/audit.c
--- 2/kernel/audit.c
+++ b/kernel/audit.c
@@@ -107,7 -107,6 +107,6 @@@ static u32 audit_rate_limit
    * When set to zero, this means unlimited. */
   static u32    audit_backlog_limit = 64;
   #define AUDIT_BACKLOG_WAIT_TIME (60 * HZ)
- static u32    audit_backlog_wait_time_master = AUDIT_BACKLOG_WAIT_TIME;
   static u32    audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME;
   
   /* The identity of the user shutting down the audit system. */
@@@ -126,7 -125,7 +125,7 @@@ static atomic_t    audit_lost = ATOMIC_
   
   /* The netlink socket. */
   static struct sock *audit_sock;
- -static int audit_net_id;
+ +static unsigned int audit_net_id;
   
   /* Hash for inode-based rules */
   struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
@@@ -138,11 -137,18 +137,18 @@@ static DEFINE_SPINLOCK(audit_freelist_l
   static int       audit_freelist_count;
   static LIST_HEAD(audit_freelist);
   
- static struct sk_buff_head audit_skb_queue;
- /* queue of skbs to send to auditd when/if it comes back */
- static struct sk_buff_head audit_skb_hold_queue;
+ /* queue msgs to send via kauditd_task */
+ static struct sk_buff_head audit_queue;
+ /* queue msgs due to temporary unicast send problems */
+ static struct sk_buff_head audit_retry_queue;
+ /* queue msgs waiting for new auditd connection */
+ static struct sk_buff_head audit_hold_queue;
+ 
+ /* queue servicing thread */
   static struct task_struct *kauditd_task;
   static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
+ 
+ /* waitqueue for callers who are blocked on the audit backlog */
   static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
   
   static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION,
@@@ -338,7 -344,7 +344,7 @@@ static int audit_set_backlog_limit(u32 
   static int audit_set_backlog_wait_time(u32 timeout)
   {
         return audit_do_config_change("audit_backlog_wait_time",
-                                     &audit_backlog_wait_time_master, timeout);
+                                     &audit_backlog_wait_time, timeout);
   }
   
   static int audit_set_enabled(u32 state)
@@@ -365,29 -371,10 +371,10 @@@ static int audit_set_failure(u32 state
   }
   
   /*
-  * Queue skbs to be sent to auditd when/if it comes back.  These skbs should
-  * already have been sent via prink/syslog and so if these messages are dropped
-  * it is not a huge concern since we already passed the audit_log_lost()
-  * notification and stuff.  This is just nice to get audit messages during
-  * boot before auditd is running or messages generated while auditd is stopped.
-  * This only holds messages is audit_default is set, aka booting with audit=1
-  * or building your kernel that way.
-  */
- static void audit_hold_skb(struct sk_buff *skb)
- {
-       if (audit_default &&
-           (!audit_backlog_limit ||
-            skb_queue_len(&audit_skb_hold_queue) < audit_backlog_limit))
-               skb_queue_tail(&audit_skb_hold_queue, skb);
-       else
-               kfree_skb(skb);
- }
- 
- /*
    * For one reason or another this nlh isn't getting delivered to the userspace
    * audit daemon, just send it to printk.
    */
- static void audit_printk_skb(struct sk_buff *skb)
+ static void kauditd_printk_skb(struct sk_buff *skb)
   {
         struct nlmsghdr *nlh = nlmsg_hdr(skb);
         char *data = nlmsg_data(nlh);
@@@ -398,58 -385,123 +385,123 @@@
                 else
                         audit_log_lost("printk limit exceeded");
         }
+ }
+ 
+ /**
+  * kauditd_hold_skb - Queue an audit record, waiting for auditd
+  * @skb: audit record
+  *
+  * Description:
+  * Queue the audit record, waiting for an instance of auditd.  When this
+  * function is called we haven't given up yet on sending the record, but things
+  * are not looking good.  The first thing we want to do is try to write the
+  * record via printk and then see if we want to try and hold on to the record
+  * and queue it, if we have room.  If we want to hold on to the record, but we
+  * don't have room, record a record lost message.
+  */
+ static void kauditd_hold_skb(struct sk_buff *skb)
+ {
+       /* at this point it is uncertain if we will ever send this to auditd so
+        * try to send the message via printk before we go any further */
+       kauditd_printk_skb(skb);
+ 
+       /* can we just silently drop the message? */
+       if (!audit_default) {
+               kfree_skb(skb);
+               return;
+       }
+ 
+       /* if we have room, queue the message */
+       if (!audit_backlog_limit ||
+           skb_queue_len(&audit_hold_queue) < audit_backlog_limit) {
+               skb_queue_tail(&audit_hold_queue, skb);
+               return;
+       }
   
-       audit_hold_skb(skb);
+       /* we have no other options - drop the message */
+       audit_log_lost("kauditd hold queue overflow");
+       kfree_skb(skb);
   }
   
- static void kauditd_send_skb(struct sk_buff *skb)
+ /**
+  * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd
+  * @skb: audit record
+  *
+  * Description:
+  * Not as serious as kauditd_hold_skb() as we still have a connected auditd,
+  * but for some reason we are having problems sending it audit records so
+  * queue the given record and attempt to resend.
+  */
+ static void kauditd_retry_skb(struct sk_buff *skb)
   {
-       int err;
-       int attempts = 0;
- #define AUDITD_RETRIES 5
+       /* NOTE: because records should only live in the retry queue for a
+        * short period of time, before either being sent or moved to the hold
+        * queue, we don't currently enforce a limit on this queue */
+       skb_queue_tail(&audit_retry_queue, skb);
+ }
+ 
+ /**
+  * auditd_reset - Disconnect the auditd connection
+  *
+  * Description:
+  * Break the auditd/kauditd connection and move all the records in the retry
+  * queue into the hold queue in case auditd reconnects.  The audit_cmd_mutex
+  * must be held when calling this function.
+  */
+ static void auditd_reset(void)
+ {
+       struct sk_buff *skb;
+ 
+       /* break the connection */
+       if (audit_sock) {
+               sock_put(audit_sock);
+               audit_sock = NULL;
+       }
+       audit_pid = 0;
+       audit_nlk_portid = 0;
+ 
+       /* flush all of the retry queue to the hold queue */
+       while ((skb = skb_dequeue(&audit_retry_queue)))
+               kauditd_hold_skb(skb);
+ }
+ 
+ /**
+  * kauditd_send_unicast_skb - Send a record via unicast to auditd
+  * @skb: audit record
+  */
+ static int kauditd_send_unicast_skb(struct sk_buff *skb)
+ {
+       int rc;
   
- restart:
-       /* take a reference in case we can't send it and we want to hold it */
+       /* if we know nothing is connected, don't even try the netlink call */
+       if (!audit_pid)
+               return -ECONNREFUSED;
+ 
+       /* get an extra skb reference in case we fail to send */
         skb_get(skb);
-       err = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
-       if (err < 0) {
-               pr_err("netlink_unicast sending to audit_pid=%d returned error: %d\n",
-                      audit_pid, err);
-               if (audit_pid) {
-                       if (err == -ECONNREFUSED || err == -EPERM
-                           || ++attempts >= AUDITD_RETRIES) {
-                               char s[32];
- 
-                               snprintf(s, sizeof(s), "audit_pid=%d reset", audit_pid);
-                               audit_log_lost(s);
-                               audit_pid = 0;
-                               audit_sock = NULL;
-                       } else {
-                               pr_warn("re-scheduling(#%d) write to audit_pid=%d\n",
-                                       attempts, audit_pid);
-                               set_current_state(TASK_INTERRUPTIBLE);
-                               schedule();
-                               goto restart;
-                       }
-               }
-               /* we might get lucky and get this in the next auditd */
-               audit_hold_skb(skb);
-       } else
-               /* drop the extra reference if sent ok */
+       rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
+       if (rc >= 0) {
                 consume_skb(skb);
+               rc = 0;
+       }
+ 
+       return rc;
   }
   
   /*
-  * kauditd_send_multicast_skb - send the skb to multicast userspace listeners
+  * kauditd_send_multicast_skb - Send a record to any multicast listeners
+  * @skb: audit record
    *
+  * Description:
    * This function doesn't consume an skb as might be expected since it has to
    * copy it anyways.
    */
- static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask)
+ static void kauditd_send_multicast_skb(struct sk_buff *skb)
   {
-       struct sk_buff          *copy;
-       struct audit_net        *aunet = net_generic(&init_net, audit_net_id);
-       struct sock             *sock = aunet->nlsk;
+       struct sk_buff *copy;
+       struct audit_net *aunet = net_generic(&init_net, audit_net_id);
+       struct sock *sock = aunet->nlsk;
+       struct nlmsghdr *nlh;
   
         if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG))
                 return;
@@@ -464,74 -516,161 +516,161 @@@
          * no reason for new multicast clients to continue with this
          * non-compliance.
          */
-       copy = skb_copy(skb, gfp_mask);
+       copy = skb_copy(skb, GFP_KERNEL);
         if (!copy)
                 return;
+       nlh = nlmsg_hdr(copy);
+       nlh->nlmsg_len = skb->len;
   
-       nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, gfp_mask);
+       nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL);
   }
   
- /*
-  * flush_hold_queue - empty the hold queue if auditd appears
-  *
-  * If auditd just started, drain the queue of messages already
-  * sent to syslog/printk.  Remember loss here is ok.  We already
-  * called audit_log_lost() if it didn't go out normally.  so the
-  * race between the skb_dequeue and the next check for audit_pid
-  * doesn't matter.
+ /**
+  * kauditd_wake_condition - Return true when it is time to wake kauditd_thread
    *
-  * If you ever find kauditd to be too slow we can get a perf win
-  * by doing our own locking and keeping better track if there
-  * are messages in this queue.  I don't see the need now, but
-  * in 5 years when I want to play with this again I'll see this
-  * note and still have no friggin idea what i'm thinking today.
+  * Description:
+  * This function is for use by the wait_event_freezable() call in
+  * kauditd_thread().
    */
- static void flush_hold_queue(void)
+ static int kauditd_wake_condition(void)
   {
-       struct sk_buff *skb;
- 
-       if (!audit_default || !audit_pid)
-               return;
- 
-       skb = skb_dequeue(&audit_skb_hold_queue);
-       if (likely(!skb))
-               return;
+       static int pid_last = 0;
+       int rc;
+       int pid = audit_pid;
   
-       while (skb && audit_pid) {
-               kauditd_send_skb(skb);
-               skb = skb_dequeue(&audit_skb_hold_queue);
-       }
+       /* wake on new messages or a change in the connected auditd */
+       rc = skb_queue_len(&audit_queue) || (pid && pid != pid_last);
+       if (rc)
+               pid_last = pid;
   
-       /*
-        * if auditd just disappeared but we
-        * dequeued an skb we need to drop ref
-        */
-       consume_skb(skb);
+       return rc;
   }
   
   static int kauditd_thread(void *dummy)
   {
+       int rc;
+       int auditd = 0;
+       int reschedule = 0;
+       struct sk_buff *skb;
+       struct nlmsghdr *nlh;
+ 
+ #define UNICAST_RETRIES 5
+ #define AUDITD_BAD(x,y) \
+       ((x) == -ECONNREFUSED || (x) == -EPERM || ++(y) >= UNICAST_RETRIES)
+ 
+       /* NOTE: we do invalidate the auditd connection flag on any sending
+        * errors, but we only "restore" the connection flag at specific places
+        * in the loop in order to help ensure proper ordering of audit
+        * records */
+ 
         set_freezable();
         while (!kthread_should_stop()) {
-               struct sk_buff *skb;
- 
-               flush_hold_queue();
+               /* NOTE: possible area for future improvement is to look at
+                *       the hold and retry queues, since only this thread
+                *       has access to these queues we might be able to do
+                *       our own queuing and skip some/all of the locking */
+ 
+               /* NOTE: it might be a fun experiment to split the hold and
+                *       retry queue handling to another thread, but the
+                *       synchronization issues and other overhead might kill
+                *       any performance gains */
+ 
+               /* attempt to flush the hold queue */
+               while (auditd && (skb = skb_dequeue(&audit_hold_queue))) {
+                       rc = kauditd_send_unicast_skb(skb);
+                       if (rc) {
+                               /* requeue to the same spot */
+                               skb_queue_head(&audit_hold_queue, skb);
+ 
+                               auditd = 0;
+                               if (AUDITD_BAD(rc, reschedule)) {
+                                       mutex_lock(&audit_cmd_mutex);
+                                       auditd_reset();
+                                       mutex_unlock(&audit_cmd_mutex);
+                                       reschedule = 0;
+                               }
+                       } else
+                               /* we were able to send successfully */
+                               reschedule = 0;
+               }
   
-               skb = skb_dequeue(&audit_skb_queue);
+               /* attempt to flush the retry queue */
+               while (auditd && (skb = skb_dequeue(&audit_retry_queue))) {
+                       rc = kauditd_send_unicast_skb(skb);
+                       if (rc) {
+                               auditd = 0;
+                               if (AUDITD_BAD(rc, reschedule)) {
+                                       kauditd_hold_skb(skb);
+                                       mutex_lock(&audit_cmd_mutex);
+                                       auditd_reset();
+                                       mutex_unlock(&audit_cmd_mutex);
+                                       reschedule = 0;
+                               } else
+                                       /* temporary problem (we hope), queue
+                                        * to the same spot and retry */
+                                       skb_queue_head(&audit_retry_queue, skb);
+                       } else
+                               /* we were able to send successfully */
+                               reschedule = 0;
+               }
   
+               /* standard queue processing, try to be as quick as possible */
+ quick_loop:
+               skb = skb_dequeue(&audit_queue);
                 if (skb) {
-                       if (!audit_backlog_limit ||
-                           (skb_queue_len(&audit_skb_queue) <= audit_backlog_limit))
-                               wake_up(&audit_backlog_wait);
-                       if (audit_pid)
-                               kauditd_send_skb(skb);
+                       /* setup the netlink header, see the comments in
+                        * kauditd_send_multicast_skb() for length quirks */
+                       nlh = nlmsg_hdr(skb);
+                       nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
+ 
+                       /* attempt to send to any multicast listeners */
+                       kauditd_send_multicast_skb(skb);
+ 
+                       /* attempt to send to auditd, queue on failure */
+                       if (auditd) {
+                               rc = kauditd_send_unicast_skb(skb);
+                               if (rc) {
+                                       auditd = 0;
+                                       if (AUDITD_BAD(rc, reschedule)) {
+                                               mutex_lock(&audit_cmd_mutex);
+                                               auditd_reset();
+                                               mutex_unlock(&audit_cmd_mutex);
+                                               reschedule = 0;
+                                       }
+ 
+                                       /* move to the retry queue */
+                                       kauditd_retry_skb(skb);
+                               } else
+                                       /* everything is working so go fast! */
+                                       goto quick_loop;
+                       } else if (reschedule)
+                               /* we are currently having problems, move to
+                                * the retry queue */
+                               kauditd_retry_skb(skb);
                         else
-                               audit_printk_skb(skb);
-                       continue;
-               }
+                               /* dump the message via printk and hold it */
+                               kauditd_hold_skb(skb);
+               } else {
+                       /* we have flushed the backlog so wake everyone */
+                       wake_up(&audit_backlog_wait);
+ 
+                       /* if everything is okay with auditd (if present), go
+                        * to sleep until there is something new in the queue
+                        * or we have a change in the connected auditd;
+                        * otherwise simply reschedule to give things a chance
+                        * to recover */
+                       if (reschedule) {
+                               set_current_state(TASK_INTERRUPTIBLE);
+                               schedule();
+                       } else
+                               wait_event_freezable(kauditd_wait,
+                                                    kauditd_wake_condition());
   
-               wait_event_freezable(kauditd_wait, skb_queue_len(&audit_skb_queue));
+                       /* update the auditd connection status */
+                       auditd = (audit_pid ? 1 : 0);
+               }
         }
+ 
         return 0;
   }
   
@@@ -596,6 -735,7 +735,7 @@@ static int audit_send_reply_thread(voi
         kfree(reply);
         return 0;
   }
+ 
   /**
    * audit_send_reply - send an audit reply message via netlink
    * @request_skb: skb of request we are replying to (used to target the reply)
@@@ -832,16 -972,6 +972,6 @@@ static int audit_receive_msg(struct sk_
         if (err)
                 return err;
   
-       /* As soon as there's any sign of userspace auditd,
-        * start kauditd to talk to it */
-       if (!kauditd_task) {
-               kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
-               if (IS_ERR(kauditd_task)) {
-                       err = PTR_ERR(kauditd_task);
-                       kauditd_task = NULL;
-                       return err;
-               }
-       }
         seq  = nlh->nlmsg_seq;
         data = nlmsg_data(nlh);
   
@@@ -855,9 -985,9 +985,9 @@@
                 s.rate_limit            = audit_rate_limit;
                 s.backlog_limit         = audit_backlog_limit;
                 s.lost                  = atomic_read(&audit_lost);
-               s.backlog               = skb_queue_len(&audit_skb_queue);
+               s.backlog               = skb_queue_len(&audit_queue);
                 s.feature_bitmap        = AUDIT_FEATURE_BITMAP_ALL;
-               s.backlog_wait_time     = audit_backlog_wait_time_master;
+               s.backlog_wait_time     = audit_backlog_wait_time;
                 audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
                 break;
         }
@@@ -877,12 -1007,6 +1007,12 @@@
                                 return err;
                 }
                 if (s.mask & AUDIT_STATUS_PID) {
+ +                      /* NOTE: we are using task_tgid_vnr() below because
+ +                       *       the s.pid value is relative to the namespace
+ +                       *       of the caller; at present this doesn't matter
+ +                       *       much since you can really only run auditd
+ +                       *       from the initial pid namespace, but something
+ +                       *       to keep in mind if this changes */
                         int new_pid = s.pid;
                         pid_t requesting_pid = task_tgid_vnr(current);
   
@@@ -897,9 -1021,17 +1027,17 @@@
                         }
                         if (audit_enabled != AUDIT_OFF)
                                 audit_log_config_change("audit_pid", new_pid, audit_pid, 1);
-                       audit_pid = new_pid;
-                       audit_nlk_portid = NETLINK_CB(skb).portid;
-                       audit_sock = skb->sk;
+                       if (new_pid) {
+                               if (audit_sock)
+                                       sock_put(audit_sock);
+                               audit_pid = new_pid;
+                               audit_nlk_portid = NETLINK_CB(skb).portid;
+                               sock_hold(skb->sk);
+                               audit_sock = skb->sk;
+                       } else {
+                               auditd_reset();
+                       }
+                       wake_up_interruptible(&kauditd_wait);
                 }
                 if (s.mask & AUDIT_STATUS_RATE_LIMIT) {
                         err = audit_set_rate_limit(s.rate_limit);
@@@ -1167,13 -1299,14 +1305,13 @@@ static void __net_exit audit_net_exit(s
   {
         struct audit_net *aunet = net_generic(net, audit_net_id);
         struct sock *sock = aunet->nlsk;
-       if (sock == audit_sock) {
-               audit_pid = 0;
-               audit_sock = NULL;
-       }
+       mutex_lock(&audit_cmd_mutex);
+       if (sock == audit_sock)
+               auditd_reset();
+       mutex_unlock(&audit_cmd_mutex);
   
- -      RCU_INIT_POINTER(aunet->nlsk, NULL);
- -      synchronize_net();
         netlink_kernel_release(sock);
+ +      aunet->nlsk = NULL;
   }
   
   static struct pernet_operations audit_net_ops __net_initdata = {
@@@ -1195,17 -1328,24 +1333,24 @@@ static int __init audit_init(void
                 audit_default ? "enabled" : "disabled");
         register_pernet_subsys(&audit_net_ops);
   
-       skb_queue_head_init(&audit_skb_queue);
-       skb_queue_head_init(&audit_skb_hold_queue);
+       skb_queue_head_init(&audit_queue);
+       skb_queue_head_init(&audit_retry_queue);
+       skb_queue_head_init(&audit_hold_queue);
         audit_initialized = AUDIT_INITIALIZED;
         audit_enabled = audit_default;
         audit_ever_enabled |= !!audit_default;
   
-       audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
- 
         for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
                 INIT_LIST_HEAD(&audit_inode_hash[i]);
   
+       kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
+       if (IS_ERR(kauditd_task)) {
+               int err = PTR_ERR(kauditd_task);
+               panic("audit: failed to start the kauditd thread (%d)\n", err);
+       }
+ 
+       audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
+ 
         return 0;
   }
   __initcall(audit_init);
@@@ -1338,24 -1478,6 +1483,6 @@@ static inline void audit_get_stamp(stru
         }
   }
   
- /*
-  * Wait for auditd to drain the queue a little
-  */
- static long wait_for_auditd(long sleep_time)
- {
-       DECLARE_WAITQUEUE(wait, current);
- 
-       if (audit_backlog_limit &&
-           skb_queue_len(&audit_skb_queue) > audit_backlog_limit) {
-               add_wait_queue_exclusive(&audit_backlog_wait, &wait);
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               sleep_time = schedule_timeout(sleep_time);
-               remove_wait_queue(&audit_backlog_wait, &wait);
-       }
- 
-       return sleep_time;
- }
- 
   /**
    * audit_log_start - obtain an audit buffer
    * @ctx: audit_context (may be NULL)
@@@ -1374,12 -1496,9 +1501,9 @@@
   struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
                                      int type)
   {
-       struct audit_buffer     *ab     = NULL;
-       struct timespec         t;
-       unsigned int            uninitialized_var(serial);
-       int reserve = 5; /* Allow atomic callers to go up to five
-                           entries over the normal backlog limit */
-       unsigned long timeout_start = jiffies;
+       struct audit_buffer *ab;
+       struct timespec t;
+       unsigned int uninitialized_var(serial);
   
         if (audit_initialized != AUDIT_INITIALIZED)
                 return NULL;
@@@ -1387,38 -1506,48 +1511,48 @@@
         if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE)))
                 return NULL;
   
-       if (gfp_mask & __GFP_DIRECT_RECLAIM) {
-               if (audit_pid && audit_pid == current->tgid)
-                       gfp_mask &= ~__GFP_DIRECT_RECLAIM;
-               else
-                       reserve = 0;
-       }
- 
-       while (audit_backlog_limit
-              && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) {
-               if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) {
-                       long sleep_time;
+       /* don't ever fail/sleep on these two conditions:
+        * 1. auditd generated record - since we need auditd to drain the
+        *    queue; also, when we are checking for auditd, compare PIDs using
+        *    task_tgid_vnr() since auditd_pid is set in audit_receive_msg()
+        *    using a PID anchored in the caller's namespace
+        * 2. audit command message - record types 1000 through 1099 inclusive
+        *    are command messages/records used to manage the kernel subsystem
+        *    and the audit userspace, blocking on these messages could cause
+        *    problems under load so don't do it (note: not all of these
+        *    command types are valid as record types, but it is quicker to
+        *    just check two ints than a series of ints in a if/switch stmt) */
+       if (!((audit_pid && audit_pid == task_tgid_vnr(current)) ||
+             (type >= 1000 && type <= 1099))) {
+               long sleep_time = audit_backlog_wait_time;
+ 
+               while (audit_backlog_limit &&
+                      (skb_queue_len(&audit_queue) > audit_backlog_limit)) {
+                       /* wake kauditd to try and flush the queue */
+                       wake_up_interruptible(&kauditd_wait);
   
-                       sleep_time = timeout_start + audit_backlog_wait_time - jiffies;
-                       if (sleep_time > 0) {
-                               sleep_time = wait_for_auditd(sleep_time);
-                               if (sleep_time > 0)
-                                       continue;
+                       /* sleep if we are allowed and we haven't exhausted our
+                        * backlog wait limit */
+                       if ((gfp_mask & __GFP_DIRECT_RECLAIM) &&
+                           (sleep_time > 0)) {
+                               DECLARE_WAITQUEUE(wait, current);
+ 
+                               add_wait_queue_exclusive(&audit_backlog_wait,
+                                                        &wait);
+                               set_current_state(TASK_UNINTERRUPTIBLE);
+                               sleep_time = schedule_timeout(sleep_time);
+                               remove_wait_queue(&audit_backlog_wait, &wait);
+                       } else {
+                               if (audit_rate_check() && printk_ratelimit())
+                                       pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n",
+                                               skb_queue_len(&audit_queue),
+                                               audit_backlog_limit);
+                               audit_log_lost("backlog limit exceeded");
+                               return NULL;
                         }
                 }
-               if (audit_rate_check() && printk_ratelimit())
-                       pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n",
-                               skb_queue_len(&audit_skb_queue),
-                               audit_backlog_limit);
-               audit_log_lost("backlog limit exceeded");
-               audit_backlog_wait_time = 0;
-               wake_up(&audit_backlog_wait);
-               return NULL;
         }
   
-       if (!reserve && !audit_backlog_wait_time)
-               audit_backlog_wait_time = audit_backlog_wait_time_master;
- 
         ab = audit_buffer_alloc(ctx, gfp_mask, type);
         if (!ab) {
                 audit_log_lost("out of memory in audit_log_start");
@@@ -1426,9 -1555,9 +1560,9 @@@
         }
   
         audit_get_stamp(ab->ctx, &t, &serial);
- 
         audit_log_format(ab, "audit(%lu.%03lu:%u): ",
                          t.tv_sec, t.tv_nsec/1000000, serial);
+ 
         return ab;
   }
   
@@@ -1922,7 -2051,7 +2056,7 @@@ void audit_log_task_info(struct audit_b
                          " euid=%u suid=%u fsuid=%u"
                          " egid=%u sgid=%u fsgid=%u tty=%s ses=%u",
                          task_ppid_nr(tsk),
- -                       task_pid_nr(tsk),
+ +                       task_tgid_nr(tsk),
                          from_kuid(&init_user_ns, audit_get_loginuid(tsk)),
                          from_kuid(&init_user_ns, cred->uid),
                          from_kgid(&init_user_ns, cred->gid),
@@@ -1978,10 -2107,10 +2112,10 @@@ out
    * audit_log_end - end one audit record
    * @ab: the audit_buffer
    *
-  * netlink_unicast() cannot be called inside an irq context because it blocks
-  * (last arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed
-  * on a queue and a tasklet is scheduled to remove them from the queue outside
-  * the irq context.  May be called in any context.
+  * We can not do a netlink send inside an irq context because it blocks (last
+  * arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed on a
+  * queue and a tasklet is scheduled to remove them from the queue outside the
+  * irq context.  May be called in any context.
    */
   void audit_log_end(struct audit_buffer *ab)
   {
@@@ -1990,28 -2119,8 +2124,8 @@@
         if (!audit_rate_check()) {
                 audit_log_lost("rate limit exceeded");
         } else {
-               struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
- 
-               nlh->nlmsg_len = ab->skb->len;
-               kauditd_send_multicast_skb(ab->skb, ab->gfp_mask);
- 
-               /*
-                * The original kaudit unicast socket sends up messages with
-                * nlmsg_len set to the payload length rather than the entire
-                * message length.  This breaks the standard set by netlink.
-                * The existing auditd daemon assumes this breakage.  Fixing
-                * this would require co-ordinating a change in the established
-                * protocol between the kaudit kernel subsystem and the auditd
-                * userspace code.
-                */
-               nlh->nlmsg_len -= NLMSG_HDRLEN;
- 
-               if (audit_pid) {
-                       skb_queue_tail(&audit_skb_queue, ab->skb);
-                       wake_up_interruptible(&kauditd_wait);
-               } else {
-                       audit_printk_skb(ab->skb);
-               }
+               skb_queue_tail(&audit_queue, ab->skb);
+               wake_up_interruptible(&kauditd_wait);
                 ab->skb = NULL;
         }
         audit_buffer_free(ab);
diff --combined kernel/auditsc.c

index 2cd5256,f78cb1b..cf1fa43
--- 1/kernel/auditsc.c
--- 2/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@@ -446,6 -446,7 +446,7 @@@ static int audit_filter_rules(struct ta
         const struct cred *cred;
         int i, need_sid = 1;
         u32 sid;
+       unsigned int sessionid;
   
         cred = rcu_dereference_check(tsk->cred, tsk == current || task_creation);
   
@@@ -457,7 -458,7 +458,7 @@@
   
                 switch (f->type) {
                 case AUDIT_PID:
- -                      pid = task_pid_nr(tsk);
+ +                      pid = task_tgid_nr(tsk);
                         result = audit_comparator(pid, f->op, f->val);
                         break;
                 case AUDIT_PPID:
@@@ -508,6 -509,10 +509,10 @@@
                 case AUDIT_FSGID:
                         result = audit_gid_comparator(cred->fsgid, f->op, f->gid);
                         break;
+               case AUDIT_SESSIONID:
+                       sessionid = audit_get_sessionid(current);
+                       result = audit_comparator(sessionid, f->op, f->val);
+                       break;
                 case AUDIT_PERS:
                         result = audit_comparator(tsk->personality, f->op, f->val);
                         break;
@@@ -1000,7 -1005,7 +1005,7 @@@ static void audit_log_execve_info(struc
         long len_rem;
         long len_full;
         long len_buf;
-       long len_abuf;
+       long len_abuf = 0;
         long len_tmp;
         bool require_data;
         bool encode;
@@@ -1993,7 -1998,7 +1998,7 @@@ static void audit_log_set_loginuid(kuid
         loginuid = from_kuid(&init_user_ns, kloginuid),
         tty = audit_get_tty(current);
   
- -      audit_log_format(ab, "pid=%d uid=%u", task_pid_nr(current), uid);
+ +      audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid);
         audit_log_task_context(ab);
         audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d",
                          oldloginuid, loginuid, tty ? tty_name(tty) : "(none)",
@@@ -2025,8 -2030,11 +2030,11 @@@ int audit_set_loginuid(kuid_t loginuid
                 goto out;
   
         /* are we setting or clearing? */
-       if (uid_valid(loginuid))
+       if (uid_valid(loginuid)) {
                 sessionid = (unsigned int)atomic_inc_return(&session_id);
+               if (unlikely(sessionid == (unsigned int)-1))
+                       sessionid = (unsigned int)atomic_inc_return(&session_id);
+       }
   
         task->sessionid = sessionid;
         task->loginuid = loginuid;
@@@ -2220,7 -2228,7 +2228,7 @@@ void __audit_ptrace(struct task_struct 
   {
         struct audit_context *context = current->audit_context;
   
- -      context->target_pid = task_pid_nr(t);
+ +      context->target_pid = task_tgid_nr(t);
         context->target_auid = audit_get_loginuid(t);
         context->target_uid = task_uid(t);
         context->target_sessionid = audit_get_sessionid(t);
@@@ -2245,7 -2253,7 +2253,7 @@@ int __audit_signal_info(int sig, struc
   
         if (audit_pid && t->tgid == audit_pid) {
                 if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) {
- -                      audit_sig_pid = task_pid_nr(tsk);
+ +                      audit_sig_pid = task_tgid_nr(tsk);
                         if (uid_valid(tsk->loginuid))
                                 audit_sig_uid = tsk->loginuid;
                         else
@@@ -2345,7 -2353,7 +2353,7 @@@ int __audit_log_bprm_fcaps(struct linux
   void __audit_log_capset(const struct cred *new, const struct cred *old)
   {
         struct audit_context *context = current->audit_context;
- -      context->capset.pid = task_pid_nr(current);
+ +      context->capset.pid = task_tgid_nr(current);
         context->capset.cap.effective   = new->cap_effective;
         context->capset.cap.inheritable = new->cap_effective;
         context->capset.cap.permitted   = new->cap_permitted;
@@@ -2377,7 -2385,7 +2385,7 @@@ static void audit_log_task(struct audit
                          from_kgid(&init_user_ns, gid),
                          sessionid);
         audit_log_task_context(ab);
- -      audit_log_format(ab, " pid=%d comm=", task_pid_nr(current));
+ +      audit_log_format(ab, " pid=%d comm=", task_tgid_nr(current));
         audit_log_untrustedstring(ab, get_task_comm(comm, current));
         audit_log_d_path_exe(ab, current->mm);
   }
diff --combined net/core/net_namespace.c

index 50fdc1b,10608dd..3c4bbec
--- 1/net/core/net_namespace.c
--- 2/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@@ -37,11 -37,6 +37,11 @@@ struct net init_net = 
   };
   EXPORT_SYMBOL(init_net);
   
+ +static bool init_net_initialized;
+ +
+ +#define MIN_PERNET_OPS_ID     \
+ +      ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
+ +
   #define INITIAL_NET_GEN_PTRS  13 /* +1 for len +2 for rcu_head */
   
   static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
@@@ -49,28 -44,27 +49,28 @@@
   static struct net_generic *net_alloc_generic(void)
   {
         struct net_generic *ng;
- -      size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
+ +      unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
   
         ng = kzalloc(generic_size, GFP_KERNEL);
         if (ng)
- -              ng->len = max_gen_ptrs;
+ +              ng->s.len = max_gen_ptrs;
   
         return ng;
   }
   
- -static int net_assign_generic(struct net *net, int id, void *data)
+ +static int net_assign_generic(struct net *net, unsigned int id, void *data)
   {
         struct net_generic *ng, *old_ng;
   
         BUG_ON(!mutex_is_locked(&net_mutex));
- -      BUG_ON(id == 0);
+ +      BUG_ON(id < MIN_PERNET_OPS_ID);
   
         old_ng = rcu_dereference_protected(net->gen,
                                            lockdep_is_held(&net_mutex));
- -      ng = old_ng;
- -      if (old_ng->len >= id)
- -              goto assign;
+ +      if (old_ng->s.len > id) {
+ +              old_ng->ptr[id] = data;
+ +              return 0;
+ +      }
   
         ng = net_alloc_generic();
         if (ng == NULL)
@@@ -87,12 -81,12 +87,12 @@@
          * the old copy for kfree after a grace period.
          */
   
- -      memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
+ +      memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID],
+ +             (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *));
+ +      ng->ptr[id] = data;
   
         rcu_assign_pointer(net->gen, ng);
- -      kfree_rcu(old_ng, rcu);
- -assign:
- -      ng->ptr[id - 1] = data;
+ +      kfree_rcu(old_ng, s.rcu);
         return 0;
   }
   
@@@ -126,7 -120,8 +126,7 @@@ out
   static void ops_free(const struct pernet_operations *ops, struct net *net)
   {
         if (ops->id && ops->size) {
- -              int id = *ops->id;
- -              kfree(net_generic(net, id));
+ +              kfree(net_generic(net, *ops->id));
         }
   }
   
@@@ -218,33 -213,29 +218,31 @@@ static void rtnl_net_notifyid(struct ne
    */
   int peernet2id_alloc(struct net *net, struct net *peer)
   {
-       unsigned long flags;
         bool alloc;
         int id;
   
-       spin_lock_irqsave(&net->nsid_lock, flags);
+ +      if (atomic_read(&net->count) == 0)
+ +              return NETNSA_NSID_NOT_ASSIGNED;
+       spin_lock_bh(&net->nsid_lock);
         alloc = atomic_read(&peer->count) == 0 ? false : true;
         id = __peernet2id_alloc(net, peer, &alloc);
-       spin_unlock_irqrestore(&net->nsid_lock, flags);
+       spin_unlock_bh(&net->nsid_lock);
         if (alloc && id >= 0)
                 rtnl_net_notifyid(net, RTM_NEWNSID, id);
         return id;
   }
- -EXPORT_SYMBOL(peernet2id_alloc);
   
   /* This function returns, if assigned, the id of a peer netns. */
   int peernet2id(struct net *net, struct net *peer)
   {
-       unsigned long flags;
         int id;
   
-       spin_lock_irqsave(&net->nsid_lock, flags);
+       spin_lock_bh(&net->nsid_lock);
         id = __peernet2id(net, peer);
-       spin_unlock_irqrestore(&net->nsid_lock, flags);
+       spin_unlock_bh(&net->nsid_lock);
         return id;
   }
+ +EXPORT_SYMBOL(peernet2id);
   
   /* This function returns true is the peer netns has an id assigned into the
    * current netns.
@@@ -256,18 -247,17 +254,17 @@@ bool peernet_has_id(struct net *net, st
   
   struct net *get_net_ns_by_id(struct net *net, int id)
   {
-       unsigned long flags;
         struct net *peer;
   
         if (id < 0)
                 return NULL;
   
         rcu_read_lock();
-       spin_lock_irqsave(&net->nsid_lock, flags);
+       spin_lock_bh(&net->nsid_lock);
         peer = idr_find(&net->netns_ids, id);
         if (peer)
                 get_net(peer);
-       spin_unlock_irqrestore(&net->nsid_lock, flags);
+       spin_unlock_bh(&net->nsid_lock);
         rcu_read_unlock();
   
         return peer;
@@@ -317,16 -307,6 +314,16 @@@ out_undo
   
   
   #ifdef CONFIG_NET_NS
+ +static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
+ +{
+ +      return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
+ +}
+ +
+ +static void dec_net_namespaces(struct ucounts *ucounts)
+ +{
+ +      dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
+ +}
+ +
   static struct kmem_cache *net_cachep;
   static struct workqueue_struct *netns_wq;
   
@@@ -368,34 -348,19 +365,34 @@@ void net_drop_ns(void *p
   struct net *copy_net_ns(unsigned long flags,
                         struct user_namespace *user_ns, struct net *old_net)
   {
+ +      struct ucounts *ucounts;
         struct net *net;
         int rv;
   
         if (!(flags & CLONE_NEWNET))
                 return get_net(old_net);
   
+ +      ucounts = inc_net_namespaces(user_ns);
+ +      if (!ucounts)
+ +              return ERR_PTR(-ENOSPC);
+ +
         net = net_alloc();
- -      if (!net)
+ +      if (!net) {
+ +              dec_net_namespaces(ucounts);
                 return ERR_PTR(-ENOMEM);
+ +      }
   
         get_user_ns(user_ns);
   
- -      mutex_lock(&net_mutex);
+ +      rv = mutex_lock_killable(&net_mutex);
+ +      if (rv < 0) {
+ +              net_free(net);
+ +              dec_net_namespaces(ucounts);
+ +              put_user_ns(user_ns);
+ +              return ERR_PTR(rv);
+ +      }
+ +
+ +      net->ucounts = ucounts;
         rv = setup_net(net, user_ns);
         if (rv == 0) {
                 rtnl_lock();
@@@ -404,7 -369,6 +401,7 @@@
         }
         mutex_unlock(&net_mutex);
         if (rv < 0) {
+ +              dec_net_namespaces(ucounts);
                 put_user_ns(user_ns);
                 net_drop_ns(net);
                 return ERR_PTR(rv);
@@@ -437,17 -401,17 +434,17 @@@ static void cleanup_net(struct work_str
                 for_each_net(tmp) {
                         int id;
   
-                       spin_lock_irq(&tmp->nsid_lock);
+                       spin_lock_bh(&tmp->nsid_lock);
                         id = __peernet2id(tmp, net);
                         if (id >= 0)
                                 idr_remove(&tmp->netns_ids, id);
-                       spin_unlock_irq(&tmp->nsid_lock);
+                       spin_unlock_bh(&tmp->nsid_lock);
                         if (id >= 0)
                                 rtnl_net_notifyid(tmp, RTM_DELNSID, id);
                 }
-               spin_lock_irq(&net->nsid_lock);
+               spin_lock_bh(&net->nsid_lock);
                 idr_destroy(&net->netns_ids);
-               spin_unlock_irq(&net->nsid_lock);
+               spin_unlock_bh(&net->nsid_lock);
   
         }
         rtnl_unlock();
@@@ -477,7 -441,6 +474,7 @@@
         /* Finally it is safe to free my network namespace structure */
         list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
                 list_del_init(&net->exit_list);
+ +              dec_net_namespaces(net->ucounts);
                 put_user_ns(net->user_ns);
                 net_drop_ns(net);
         }
@@@ -565,7 -528,7 +562,7 @@@ static struct pernet_operations __net_i
         .exit = net_ns_net_exit,
   };
   
- -static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
+ +static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
         [NETNSA_NONE]           = { .type = NLA_UNSPEC },
         [NETNSA_NSID]           = { .type = NLA_S32 },
         [NETNSA_PID]            = { .type = NLA_U32 },
@@@ -576,7 -539,6 +573,6 @@@ static int rtnl_net_newid(struct sk_buf
   {
         struct net *net = sock_net(skb->sk);
         struct nlattr *tb[NETNSA_MAX + 1];
-       unsigned long flags;
         struct net *peer;
         int nsid, err;
   
@@@ -597,15 -559,15 +593,15 @@@
         if (IS_ERR(peer))
                 return PTR_ERR(peer);
   
-       spin_lock_irqsave(&net->nsid_lock, flags);
+       spin_lock_bh(&net->nsid_lock);
         if (__peernet2id(net, peer) >= 0) {
-               spin_unlock_irqrestore(&net->nsid_lock, flags);
+               spin_unlock_bh(&net->nsid_lock);
                 err = -EEXIST;
                 goto out;
         }
   
         err = alloc_netid(net, peer, nsid);
-       spin_unlock_irqrestore(&net->nsid_lock, flags);
+       spin_unlock_bh(&net->nsid_lock);
         if (err >= 0) {
                 rtnl_net_notifyid(net, RTM_NEWNSID, err);
                 err = 0;
@@@ -727,11 -689,10 +723,10 @@@ static int rtnl_net_dumpid(struct sk_bu
                 .idx = 0,
                 .s_idx = cb->args[0],
         };
-       unsigned long flags;
   
-       spin_lock_irqsave(&net->nsid_lock, flags);
+       spin_lock_bh(&net->nsid_lock);
         idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
-       spin_unlock_irqrestore(&net->nsid_lock, flags);
+       spin_unlock_bh(&net->nsid_lock);
   
         cb->args[0] = net_cb.idx;
         return skb->len;
@@@ -784,8 -745,6 +779,8 @@@ static int __init net_ns_init(void
         if (setup_net(&init_net, &init_user_ns))
                 panic("Could not setup the initial network namespace");
   
+ +      init_net_initialized = true;
+ +
         rtnl_lock();
         list_add_tail_rcu(&init_net.list, &net_namespace_list);
         rtnl_unlock();
@@@ -847,24 -806,15 +842,24 @@@ static void __unregister_pernet_operati
   static int __register_pernet_operations(struct list_head *list,
                                         struct pernet_operations *ops)
   {
+ +      if (!init_net_initialized) {
+ +              list_add_tail(&ops->list, list);
+ +              return 0;
+ +      }
+ +
         return ops_init(ops, &init_net);
   }
   
   static void __unregister_pernet_operations(struct pernet_operations *ops)
   {
- -      LIST_HEAD(net_exit_list);
- -      list_add(&init_net.exit_list, &net_exit_list);
- -      ops_exit_list(ops, &net_exit_list);
- -      ops_free_list(ops, &net_exit_list);
+ +      if (!init_net_initialized) {
+ +              list_del(&ops->list);
+ +      } else {
+ +              LIST_HEAD(net_exit_list);
+ +              list_add(&init_net.exit_list, &net_exit_list);
+ +              ops_exit_list(ops, &net_exit_list);
+ +              ops_free_list(ops, &net_exit_list);
+ +      }
   }
   
   #endif /* CONFIG_NET_NS */
@@@ -878,7 -828,7 +873,7 @@@ static int register_pernet_operations(s
   
         if (ops->id) {
   again:
- -              error = ida_get_new_above(&net_generic_ids, 1, ops->id);
+ +              error = ida_get_new_above(&net_generic_ids, MIN_PERNET_OPS_ID, ops->id);
                 if (error < 0) {
                         if (error == -EAGAIN) {
                                 ida_pre_get(&net_generic_ids, GFP_KERNEL);
@@@ -886,7 -836,7 +881,7 @@@
                         }
                         return error;
                 }
- -              max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
+ +              max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
         }
         error = __register_pernet_operations(list, ops);
         if (error) {
@@@ -1041,17 -991,11 +1036,17 @@@ static int netns_install(struct nsprox
         return 0;
   }
   
+ +static struct user_namespace *netns_owner(struct ns_common *ns)
+ +{
+ +      return to_net_ns(ns)->user_ns;
+ +}
+ +
   const struct proc_ns_operations netns_operations = {
         .name           = "net",
         .type           = CLONE_NEWNET,
         .get            = netns_get,
         .put            = netns_put,
         .install        = netns_install,
+ +      .owner          = netns_owner,
   };
   #endif
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 14 Dec 2016 22:06:40 +0000 (14:06 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 14 Dec 2016 22:06:40 +0000 (14:06 -0800)
		1	2
fs/proc/base.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/uapi/linux/audit.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/audit.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/auditsc.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/net_namespace.c	patch \|	diff1 \|	diff2 \|	blob \| history