* in /proc for a task before it execs a suid executable.
*/
+static u8 nlink_tid;
+static u8 nlink_tgid;
+
struct pid_entry {
const char *name;
- int len;
+ unsigned int len;
umode_t mode;
const struct inode_operations *iop;
const struct file_operations *fop;
* Count the number of hardlinks for the pid_entry table, excluding the .
* and .. links.
*/
-static unsigned int pid_entry_count_dirs(const struct pid_entry *entries,
+static unsigned int __init pid_entry_nlink(const struct pid_entry *entries,
unsigned int n)
{
unsigned int i;
unsigned int count;
- count = 0;
+ count = 2;
for (i = 0; i < n; ++i) {
if (S_ISDIR(entries[i].mode))
++count;
.llseek = generic_file_llseek,
};
-static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns,
- struct pid *pid, struct task_struct *task)
-{
- struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
- if (mm && !IS_ERR(mm)) {
- unsigned int nwords = 0;
- do {
- nwords += 2;
- } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
- seq_write(m, mm->saved_auxv, nwords * sizeof(mm->saved_auxv[0]));
- mmput(mm);
- return 0;
- } else
- return PTR_ERR(mm);
-}
-
-
#ifdef CONFIG_KALLSYMS
/*
* Provides a wchan file via kallsyms in a proper one-value-per-file format.
save_stack_trace_tsk(task, &trace);
for (i = 0; i < trace.nr_entries; i++) {
- seq_printf(m, "[<%pK>] %pS\n",
+ seq_printf(m, "[<%pK>] %pB\n",
(void *)entries[i], (void *)entries[i]);
}
unlock_trace(task);
if (attr->ia_valid & ATTR_MODE)
return -EPERM;
- error = inode_change_ok(inode, attr);
+ error = setattr_prepare(dentry, attr);
if (error)
return error;
unsigned long addr = *ppos;
ssize_t copied;
char *page;
+ unsigned int flags;
if (!mm)
return 0;
if (!atomic_inc_not_zero(&mm->mm_users))
goto free;
+ /* Maybe we should limit FOLL_FORCE to actual ptrace users? */
+ flags = FOLL_FORCE;
+ if (write)
+ flags |= FOLL_WRITE;
+
while (count > 0) {
int this_len = min_t(int, count, PAGE_SIZE);
break;
}
- this_len = access_remote_vm(mm, addr, page, this_len, write);
+ this_len = access_remote_vm(mm, addr, page, this_len, flags);
if (!this_len) {
if (!copied)
copied = -EIO;
max_len = min_t(size_t, PAGE_SIZE, count);
this_len = min(max_len, this_len);
- retval = access_remote_vm(mm, (env_start + src),
- page, this_len, 0);
+ retval = access_remote_vm(mm, (env_start + src), page, this_len, 0);
if (retval <= 0) {
ret = retval;
.release = mem_release,
};
+static int auxv_open(struct inode *inode, struct file *file)
+{
+ return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
+}
+
+static ssize_t auxv_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct mm_struct *mm = file->private_data;
+ unsigned int nwords = 0;
+
+ if (!mm)
+ return 0;
+ do {
+ nwords += 2;
+ } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
+ return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv,
+ nwords * sizeof(mm->saved_auxv[0]));
+}
+
+static const struct file_operations proc_auxv_operations = {
+ .open = auxv_open,
+ .read = auxv_read,
+ .llseek = generic_file_llseek,
+ .release = mem_release,
+};
+
static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
};
#ifdef CONFIG_AUDITSYSCALL
- #define TMPBUFLEN 21
+ #define TMPBUFLEN 11
static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
size_t count, loff_t *ppos)
{
/* building an inode */
-struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
+struct inode *proc_pid_make_inode(struct super_block * sb,
+ struct task_struct *task, umode_t mode)
{
struct inode * inode;
struct proc_inode *ei;
/* Common stuff */
ei = PROC_I(inode);
+ inode->i_mode = mode;
inode->i_ino = get_next_ino();
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
inode->i_op = &proc_def_inode_operations;
/*
struct map_files_info {
fmode_t mode;
- unsigned long len;
+ unsigned int len;
unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
};
struct proc_inode *ei;
struct inode *inode;
- inode = proc_pid_make_inode(dir->i_sb, task);
+ inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK |
+ ((mode & FMODE_READ ) ? S_IRUSR : 0) |
+ ((mode & FMODE_WRITE) ? S_IWUSR : 0));
if (!inode)
return -ENOENT;
inode->i_op = &proc_map_files_link_inode_operations;
inode->i_size = 64;
- inode->i_mode = S_IFLNK;
-
- if (mode & FMODE_READ)
- inode->i_mode |= S_IRUSR;
- if (mode & FMODE_WRITE)
- inode->i_mode |= S_IWUSR;
d_set_d_op(dentry, &tid_map_files_dentry_operations);
d_add(dentry, inode);
if (!p)
return -ESRCH;
- if (ptrace_may_access(p, PTRACE_MODE_ATTACH_FSCREDS)) {
- task_lock(p);
- if (slack_ns == 0)
- p->timer_slack_ns = p->default_timer_slack_ns;
- else
- p->timer_slack_ns = slack_ns;
- task_unlock(p);
- } else
- count = -EPERM;
+ if (p != current) {
+ if (!capable(CAP_SYS_NICE)) {
+ count = -EPERM;
+ goto out;
+ }
+ err = security_task_setscheduler(p);
+ if (err) {
+ count = err;
+ goto out;
+ }
+ }
+
+ task_lock(p);
+ if (slack_ns == 0)
+ p->timer_slack_ns = p->default_timer_slack_ns;
+ else
+ p->timer_slack_ns = slack_ns;
+ task_unlock(p);
+
+out:
put_task_struct(p);
return count;
{
struct inode *inode = m->private;
struct task_struct *p;
- int err = 0;
+ int err = 0;
p = get_proc_task(inode);
if (!p)
return -ESRCH;
- if (ptrace_may_access(p, PTRACE_MODE_ATTACH_FSCREDS)) {
- task_lock(p);
- seq_printf(m, "%llu\n", p->timer_slack_ns);
- task_unlock(p);
- } else
- err = -EPERM;
+ if (p != current) {
+
+ if (!capable(CAP_SYS_NICE)) {
+ err = -EPERM;
+ goto out;
+ }
+ err = security_task_getscheduler(p);
+ if (err)
+ goto out;
+ }
+
+ task_lock(p);
+ seq_printf(m, "%llu\n", p->timer_slack_ns);
+ task_unlock(p);
+out:
put_task_struct(p);
return err;
struct inode *inode;
struct proc_inode *ei;
- inode = proc_pid_make_inode(dir->i_sb, task);
+ inode = proc_pid_make_inode(dir->i_sb, task, p->mode);
if (!inode)
goto out;
ei = PROC_I(inode);
- inode->i_mode = p->mode;
if (S_ISDIR(inode->i_mode))
set_nlink(inode, 2); /* Use getattr to fix if necessary */
if (p->iop)
* Yes, it does not scale. And it should not. Don't add
* new entries into /proc/<tgid>/ without very good reasons.
*/
- last = &ents[nents - 1];
- for (p = ents; p <= last; p++) {
+ last = &ents[nents];
+ for (p = ents; p < last; p++) {
if (p->len != dentry->d_name.len)
continue;
if (!memcmp(dentry->d_name.name, p->name, p->len))
break;
}
- if (p > last)
+ if (p >= last)
goto out;
error = proc_pident_instantiate(dir, dentry, task, p);
if (ctx->pos >= nents + 2)
goto out;
- for (p = ents + (ctx->pos - 2); p <= ents + nents - 1; p++) {
+ for (p = ents + (ctx->pos - 2); p < ents + nents; p++) {
if (!proc_fill_cache(file, ctx, p->name, p->len,
proc_pident_instantiate, task, p))
break;
DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
#endif
REG("environ", S_IRUSR, proc_environ_operations),
- ONE("auxv", S_IRUSR, proc_pid_auxv),
+ REG("auxv", S_IRUSR, proc_auxv_operations),
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUSR, proc_pid_personality),
ONE("limits", S_IRUGO, proc_pid_limits),
{
struct inode *inode;
- inode = proc_pid_make_inode(dir->i_sb, task);
+ inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
if (!inode)
goto out;
- inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
inode->i_op = &proc_tgid_base_inode_operations;
inode->i_fop = &proc_tgid_base_operations;
inode->i_flags|=S_IMMUTABLE;
- set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff,
- ARRAY_SIZE(tgid_base_stuff)));
+ set_nlink(inode, nlink_tgid);
d_set_d_op(dentry, &pid_dentry_operations);
DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
#endif
REG("environ", S_IRUSR, proc_environ_operations),
- ONE("auxv", S_IRUSR, proc_pid_auxv),
+ REG("auxv", S_IRUSR, proc_auxv_operations),
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUSR, proc_pid_personality),
ONE("limits", S_IRUGO, proc_pid_limits),
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
struct inode *inode;
- inode = proc_pid_make_inode(dir->i_sb, task);
+ inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
if (!inode)
goto out;
- inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
inode->i_op = &proc_tid_base_inode_operations;
inode->i_fop = &proc_tid_base_operations;
inode->i_flags|=S_IMMUTABLE;
- set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff,
- ARRAY_SIZE(tid_base_stuff)));
+ set_nlink(inode, nlink_tid);
d_set_d_op(dentry, &pid_dentry_operations);
.iterate_shared = proc_task_readdir,
.llseek = generic_file_llseek,
};
+
+void __init set_proc_pid_nlink(void)
+{
+ nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
+ nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
+}
#define AUDIT_OBJ_LEV_LOW 22
#define AUDIT_OBJ_LEV_HIGH 23
#define AUDIT_LOGINUID_SET 24
+ #define AUDIT_SESSIONID 25 /* Session ID */
/* These are ONLY useful when checking
* at syscall exit time (AUDIT_AT_EXIT). */
#define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT 0x00000001
#define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME 0x00000002
#define AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH 0x00000004
+#define AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND 0x00000008
+ #define AUDIT_FEATURE_BITMAP_SESSIONID_FILTER 0x00000010
#define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \
AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \
AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH | \
- AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND)
++ AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND | \
+ AUDIT_FEATURE_BITMAP_SESSIONID_FILTER)
/* deprecated: AUDIT_VERSION_* */
#define AUDIT_VERSION_LATEST AUDIT_FEATURE_BITMAP_ALL
* When set to zero, this means unlimited. */
static u32 audit_backlog_limit = 64;
#define AUDIT_BACKLOG_WAIT_TIME (60 * HZ)
- static u32 audit_backlog_wait_time_master = AUDIT_BACKLOG_WAIT_TIME;
static u32 audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME;
/* The identity of the user shutting down the audit system. */
/* The netlink socket. */
static struct sock *audit_sock;
-static int audit_net_id;
+static unsigned int audit_net_id;
/* Hash for inode-based rules */
struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
static int audit_freelist_count;
static LIST_HEAD(audit_freelist);
- static struct sk_buff_head audit_skb_queue;
- /* queue of skbs to send to auditd when/if it comes back */
- static struct sk_buff_head audit_skb_hold_queue;
+ /* queue msgs to send via kauditd_task */
+ static struct sk_buff_head audit_queue;
+ /* queue msgs due to temporary unicast send problems */
+ static struct sk_buff_head audit_retry_queue;
+ /* queue msgs waiting for new auditd connection */
+ static struct sk_buff_head audit_hold_queue;
+
+ /* queue servicing thread */
static struct task_struct *kauditd_task;
static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
+
+ /* waitqueue for callers who are blocked on the audit backlog */
static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION,
static int audit_set_backlog_wait_time(u32 timeout)
{
return audit_do_config_change("audit_backlog_wait_time",
- &audit_backlog_wait_time_master, timeout);
+ &audit_backlog_wait_time, timeout);
}
static int audit_set_enabled(u32 state)
}
/*
- * Queue skbs to be sent to auditd when/if it comes back. These skbs should
- * already have been sent via prink/syslog and so if these messages are dropped
- * it is not a huge concern since we already passed the audit_log_lost()
- * notification and stuff. This is just nice to get audit messages during
- * boot before auditd is running or messages generated while auditd is stopped.
- * This only holds messages is audit_default is set, aka booting with audit=1
- * or building your kernel that way.
- */
- static void audit_hold_skb(struct sk_buff *skb)
- {
- if (audit_default &&
- (!audit_backlog_limit ||
- skb_queue_len(&audit_skb_hold_queue) < audit_backlog_limit))
- skb_queue_tail(&audit_skb_hold_queue, skb);
- else
- kfree_skb(skb);
- }
-
- /*
* For one reason or another this nlh isn't getting delivered to the userspace
* audit daemon, just send it to printk.
*/
- static void audit_printk_skb(struct sk_buff *skb)
+ static void kauditd_printk_skb(struct sk_buff *skb)
{
struct nlmsghdr *nlh = nlmsg_hdr(skb);
char *data = nlmsg_data(nlh);
else
audit_log_lost("printk limit exceeded");
}
+ }
+
+ /**
+ * kauditd_hold_skb - Queue an audit record, waiting for auditd
+ * @skb: audit record
+ *
+ * Description:
+ * Queue the audit record, waiting for an instance of auditd. When this
+ * function is called we haven't given up yet on sending the record, but things
+ * are not looking good. The first thing we want to do is try to write the
+ * record via printk and then see if we want to try and hold on to the record
+ * and queue it, if we have room. If we want to hold on to the record, but we
+ * don't have room, record a record lost message.
+ */
+ static void kauditd_hold_skb(struct sk_buff *skb)
+ {
+ /* at this point it is uncertain if we will ever send this to auditd so
+ * try to send the message via printk before we go any further */
+ kauditd_printk_skb(skb);
+
+ /* can we just silently drop the message? */
+ if (!audit_default) {
+ kfree_skb(skb);
+ return;
+ }
+
+ /* if we have room, queue the message */
+ if (!audit_backlog_limit ||
+ skb_queue_len(&audit_hold_queue) < audit_backlog_limit) {
+ skb_queue_tail(&audit_hold_queue, skb);
+ return;
+ }
- audit_hold_skb(skb);
+ /* we have no other options - drop the message */
+ audit_log_lost("kauditd hold queue overflow");
+ kfree_skb(skb);
}
- static void kauditd_send_skb(struct sk_buff *skb)
+ /**
+ * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd
+ * @skb: audit record
+ *
+ * Description:
+ * Not as serious as kauditd_hold_skb() as we still have a connected auditd,
+ * but for some reason we are having problems sending it audit records so
+ * queue the given record and attempt to resend.
+ */
+ static void kauditd_retry_skb(struct sk_buff *skb)
{
- int err;
- int attempts = 0;
- #define AUDITD_RETRIES 5
+ /* NOTE: because records should only live in the retry queue for a
+ * short period of time, before either being sent or moved to the hold
+ * queue, we don't currently enforce a limit on this queue */
+ skb_queue_tail(&audit_retry_queue, skb);
+ }
+
+ /**
+ * auditd_reset - Disconnect the auditd connection
+ *
+ * Description:
+ * Break the auditd/kauditd connection and move all the records in the retry
+ * queue into the hold queue in case auditd reconnects. The audit_cmd_mutex
+ * must be held when calling this function.
+ */
+ static void auditd_reset(void)
+ {
+ struct sk_buff *skb;
+
+ /* break the connection */
+ if (audit_sock) {
+ sock_put(audit_sock);
+ audit_sock = NULL;
+ }
+ audit_pid = 0;
+ audit_nlk_portid = 0;
+
+ /* flush all of the retry queue to the hold queue */
+ while ((skb = skb_dequeue(&audit_retry_queue)))
+ kauditd_hold_skb(skb);
+ }
+
+ /**
+ * kauditd_send_unicast_skb - Send a record via unicast to auditd
+ * @skb: audit record
+ */
+ static int kauditd_send_unicast_skb(struct sk_buff *skb)
+ {
+ int rc;
- restart:
- /* take a reference in case we can't send it and we want to hold it */
+ /* if we know nothing is connected, don't even try the netlink call */
+ if (!audit_pid)
+ return -ECONNREFUSED;
+
+ /* get an extra skb reference in case we fail to send */
skb_get(skb);
- err = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
- if (err < 0) {
- pr_err("netlink_unicast sending to audit_pid=%d returned error: %d\n",
- audit_pid, err);
- if (audit_pid) {
- if (err == -ECONNREFUSED || err == -EPERM
- || ++attempts >= AUDITD_RETRIES) {
- char s[32];
-
- snprintf(s, sizeof(s), "audit_pid=%d reset", audit_pid);
- audit_log_lost(s);
- audit_pid = 0;
- audit_sock = NULL;
- } else {
- pr_warn("re-scheduling(#%d) write to audit_pid=%d\n",
- attempts, audit_pid);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- goto restart;
- }
- }
- /* we might get lucky and get this in the next auditd */
- audit_hold_skb(skb);
- } else
- /* drop the extra reference if sent ok */
+ rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
+ if (rc >= 0) {
consume_skb(skb);
+ rc = 0;
+ }
+
+ return rc;
}
/*
- * kauditd_send_multicast_skb - send the skb to multicast userspace listeners
+ * kauditd_send_multicast_skb - Send a record to any multicast listeners
+ * @skb: audit record
*
+ * Description:
* This function doesn't consume an skb as might be expected since it has to
* copy it anyways.
*/
- static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask)
+ static void kauditd_send_multicast_skb(struct sk_buff *skb)
{
- struct sk_buff *copy;
- struct audit_net *aunet = net_generic(&init_net, audit_net_id);
- struct sock *sock = aunet->nlsk;
+ struct sk_buff *copy;
+ struct audit_net *aunet = net_generic(&init_net, audit_net_id);
+ struct sock *sock = aunet->nlsk;
+ struct nlmsghdr *nlh;
if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG))
return;
* no reason for new multicast clients to continue with this
* non-compliance.
*/
- copy = skb_copy(skb, gfp_mask);
+ copy = skb_copy(skb, GFP_KERNEL);
if (!copy)
return;
+ nlh = nlmsg_hdr(copy);
+ nlh->nlmsg_len = skb->len;
- nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, gfp_mask);
+ nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL);
}
- /*
- * flush_hold_queue - empty the hold queue if auditd appears
- *
- * If auditd just started, drain the queue of messages already
- * sent to syslog/printk. Remember loss here is ok. We already
- * called audit_log_lost() if it didn't go out normally. so the
- * race between the skb_dequeue and the next check for audit_pid
- * doesn't matter.
+ /**
+ * kauditd_wake_condition - Return true when it is time to wake kauditd_thread
*
- * If you ever find kauditd to be too slow we can get a perf win
- * by doing our own locking and keeping better track if there
- * are messages in this queue. I don't see the need now, but
- * in 5 years when I want to play with this again I'll see this
- * note and still have no friggin idea what i'm thinking today.
+ * Description:
+ * This function is for use by the wait_event_freezable() call in
+ * kauditd_thread().
*/
- static void flush_hold_queue(void)
+ static int kauditd_wake_condition(void)
{
- struct sk_buff *skb;
-
- if (!audit_default || !audit_pid)
- return;
-
- skb = skb_dequeue(&audit_skb_hold_queue);
- if (likely(!skb))
- return;
+ static int pid_last = 0;
+ int rc;
+ int pid = audit_pid;
- while (skb && audit_pid) {
- kauditd_send_skb(skb);
- skb = skb_dequeue(&audit_skb_hold_queue);
- }
+ /* wake on new messages or a change in the connected auditd */
+ rc = skb_queue_len(&audit_queue) || (pid && pid != pid_last);
+ if (rc)
+ pid_last = pid;
- /*
- * if auditd just disappeared but we
- * dequeued an skb we need to drop ref
- */
- consume_skb(skb);
+ return rc;
}
static int kauditd_thread(void *dummy)
{
+ int rc;
+ int auditd = 0;
+ int reschedule = 0;
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+
+ #define UNICAST_RETRIES 5
+ #define AUDITD_BAD(x,y) \
+ ((x) == -ECONNREFUSED || (x) == -EPERM || ++(y) >= UNICAST_RETRIES)
+
+ /* NOTE: we do invalidate the auditd connection flag on any sending
+ * errors, but we only "restore" the connection flag at specific places
+ * in the loop in order to help ensure proper ordering of audit
+ * records */
+
set_freezable();
while (!kthread_should_stop()) {
- struct sk_buff *skb;
-
- flush_hold_queue();
+ /* NOTE: possible area for future improvement is to look at
+ * the hold and retry queues, since only this thread
+ * has access to these queues we might be able to do
+ * our own queuing and skip some/all of the locking */
+
+ /* NOTE: it might be a fun experiment to split the hold and
+ * retry queue handling to another thread, but the
+ * synchronization issues and other overhead might kill
+ * any performance gains */
+
+ /* attempt to flush the hold queue */
+ while (auditd && (skb = skb_dequeue(&audit_hold_queue))) {
+ rc = kauditd_send_unicast_skb(skb);
+ if (rc) {
+ /* requeue to the same spot */
+ skb_queue_head(&audit_hold_queue, skb);
+
+ auditd = 0;
+ if (AUDITD_BAD(rc, reschedule)) {
+ mutex_lock(&audit_cmd_mutex);
+ auditd_reset();
+ mutex_unlock(&audit_cmd_mutex);
+ reschedule = 0;
+ }
+ } else
+ /* we were able to send successfully */
+ reschedule = 0;
+ }
- skb = skb_dequeue(&audit_skb_queue);
+ /* attempt to flush the retry queue */
+ while (auditd && (skb = skb_dequeue(&audit_retry_queue))) {
+ rc = kauditd_send_unicast_skb(skb);
+ if (rc) {
+ auditd = 0;
+ if (AUDITD_BAD(rc, reschedule)) {
+ kauditd_hold_skb(skb);
+ mutex_lock(&audit_cmd_mutex);
+ auditd_reset();
+ mutex_unlock(&audit_cmd_mutex);
+ reschedule = 0;
+ } else
+ /* temporary problem (we hope), queue
+ * to the same spot and retry */
+ skb_queue_head(&audit_retry_queue, skb);
+ } else
+ /* we were able to send successfully */
+ reschedule = 0;
+ }
+ /* standard queue processing, try to be as quick as possible */
+ quick_loop:
+ skb = skb_dequeue(&audit_queue);
if (skb) {
- if (!audit_backlog_limit ||
- (skb_queue_len(&audit_skb_queue) <= audit_backlog_limit))
- wake_up(&audit_backlog_wait);
- if (audit_pid)
- kauditd_send_skb(skb);
+ /* setup the netlink header, see the comments in
+ * kauditd_send_multicast_skb() for length quirks */
+ nlh = nlmsg_hdr(skb);
+ nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
+
+ /* attempt to send to any multicast listeners */
+ kauditd_send_multicast_skb(skb);
+
+ /* attempt to send to auditd, queue on failure */
+ if (auditd) {
+ rc = kauditd_send_unicast_skb(skb);
+ if (rc) {
+ auditd = 0;
+ if (AUDITD_BAD(rc, reschedule)) {
+ mutex_lock(&audit_cmd_mutex);
+ auditd_reset();
+ mutex_unlock(&audit_cmd_mutex);
+ reschedule = 0;
+ }
+
+ /* move to the retry queue */
+ kauditd_retry_skb(skb);
+ } else
+ /* everything is working so go fast! */
+ goto quick_loop;
+ } else if (reschedule)
+ /* we are currently having problems, move to
+ * the retry queue */
+ kauditd_retry_skb(skb);
else
- audit_printk_skb(skb);
- continue;
- }
+ /* dump the message via printk and hold it */
+ kauditd_hold_skb(skb);
+ } else {
+ /* we have flushed the backlog so wake everyone */
+ wake_up(&audit_backlog_wait);
+
+ /* if everything is okay with auditd (if present), go
+ * to sleep until there is something new in the queue
+ * or we have a change in the connected auditd;
+ * otherwise simply reschedule to give things a chance
+ * to recover */
+ if (reschedule) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ } else
+ wait_event_freezable(kauditd_wait,
+ kauditd_wake_condition());
- wait_event_freezable(kauditd_wait, skb_queue_len(&audit_skb_queue));
+ /* update the auditd connection status */
+ auditd = (audit_pid ? 1 : 0);
+ }
}
+
return 0;
}
kfree(reply);
return 0;
}
+
/**
* audit_send_reply - send an audit reply message via netlink
* @request_skb: skb of request we are replying to (used to target the reply)
if (err)
return err;
- /* As soon as there's any sign of userspace auditd,
- * start kauditd to talk to it */
- if (!kauditd_task) {
- kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
- if (IS_ERR(kauditd_task)) {
- err = PTR_ERR(kauditd_task);
- kauditd_task = NULL;
- return err;
- }
- }
seq = nlh->nlmsg_seq;
data = nlmsg_data(nlh);
s.rate_limit = audit_rate_limit;
s.backlog_limit = audit_backlog_limit;
s.lost = atomic_read(&audit_lost);
- s.backlog = skb_queue_len(&audit_skb_queue);
+ s.backlog = skb_queue_len(&audit_queue);
s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL;
- s.backlog_wait_time = audit_backlog_wait_time_master;
+ s.backlog_wait_time = audit_backlog_wait_time;
audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
break;
}
return err;
}
if (s.mask & AUDIT_STATUS_PID) {
+ /* NOTE: we are using task_tgid_vnr() below because
+ * the s.pid value is relative to the namespace
+ * of the caller; at present this doesn't matter
+ * much since you can really only run auditd
+ * from the initial pid namespace, but something
+ * to keep in mind if this changes */
int new_pid = s.pid;
pid_t requesting_pid = task_tgid_vnr(current);
}
if (audit_enabled != AUDIT_OFF)
audit_log_config_change("audit_pid", new_pid, audit_pid, 1);
- audit_pid = new_pid;
- audit_nlk_portid = NETLINK_CB(skb).portid;
- audit_sock = skb->sk;
+ if (new_pid) {
+ if (audit_sock)
+ sock_put(audit_sock);
+ audit_pid = new_pid;
+ audit_nlk_portid = NETLINK_CB(skb).portid;
+ sock_hold(skb->sk);
+ audit_sock = skb->sk;
+ } else {
+ auditd_reset();
+ }
+ wake_up_interruptible(&kauditd_wait);
}
if (s.mask & AUDIT_STATUS_RATE_LIMIT) {
err = audit_set_rate_limit(s.rate_limit);
{
struct audit_net *aunet = net_generic(net, audit_net_id);
struct sock *sock = aunet->nlsk;
- if (sock == audit_sock) {
- audit_pid = 0;
- audit_sock = NULL;
- }
+ mutex_lock(&audit_cmd_mutex);
+ if (sock == audit_sock)
+ auditd_reset();
+ mutex_unlock(&audit_cmd_mutex);
- RCU_INIT_POINTER(aunet->nlsk, NULL);
- synchronize_net();
netlink_kernel_release(sock);
+ aunet->nlsk = NULL;
}
static struct pernet_operations audit_net_ops __net_initdata = {
audit_default ? "enabled" : "disabled");
register_pernet_subsys(&audit_net_ops);
- skb_queue_head_init(&audit_skb_queue);
- skb_queue_head_init(&audit_skb_hold_queue);
+ skb_queue_head_init(&audit_queue);
+ skb_queue_head_init(&audit_retry_queue);
+ skb_queue_head_init(&audit_hold_queue);
audit_initialized = AUDIT_INITIALIZED;
audit_enabled = audit_default;
audit_ever_enabled |= !!audit_default;
- audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
-
for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
INIT_LIST_HEAD(&audit_inode_hash[i]);
+ kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
+ if (IS_ERR(kauditd_task)) {
+ int err = PTR_ERR(kauditd_task);
+ panic("audit: failed to start the kauditd thread (%d)\n", err);
+ }
+
+ audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
+
return 0;
}
__initcall(audit_init);
}
}
- /*
- * Wait for auditd to drain the queue a little
- */
- static long wait_for_auditd(long sleep_time)
- {
- DECLARE_WAITQUEUE(wait, current);
-
- if (audit_backlog_limit &&
- skb_queue_len(&audit_skb_queue) > audit_backlog_limit) {
- add_wait_queue_exclusive(&audit_backlog_wait, &wait);
- set_current_state(TASK_UNINTERRUPTIBLE);
- sleep_time = schedule_timeout(sleep_time);
- remove_wait_queue(&audit_backlog_wait, &wait);
- }
-
- return sleep_time;
- }
-
/**
* audit_log_start - obtain an audit buffer
* @ctx: audit_context (may be NULL)
struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
int type)
{
- struct audit_buffer *ab = NULL;
- struct timespec t;
- unsigned int uninitialized_var(serial);
- int reserve = 5; /* Allow atomic callers to go up to five
- entries over the normal backlog limit */
- unsigned long timeout_start = jiffies;
+ struct audit_buffer *ab;
+ struct timespec t;
+ unsigned int uninitialized_var(serial);
if (audit_initialized != AUDIT_INITIALIZED)
return NULL;
if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE)))
return NULL;
- if (gfp_mask & __GFP_DIRECT_RECLAIM) {
- if (audit_pid && audit_pid == current->tgid)
- gfp_mask &= ~__GFP_DIRECT_RECLAIM;
- else
- reserve = 0;
- }
-
- while (audit_backlog_limit
- && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) {
- if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) {
- long sleep_time;
+ /* don't ever fail/sleep on these two conditions:
+ * 1. auditd generated record - since we need auditd to drain the
+ * queue; also, when we are checking for auditd, compare PIDs using
+ * task_tgid_vnr() since auditd_pid is set in audit_receive_msg()
+ * using a PID anchored in the caller's namespace
+ * 2. audit command message - record types 1000 through 1099 inclusive
+ * are command messages/records used to manage the kernel subsystem
+ * and the audit userspace, blocking on these messages could cause
+ * problems under load so don't do it (note: not all of these
+ * command types are valid as record types, but it is quicker to
+ * just check two ints than a series of ints in a if/switch stmt) */
+ if (!((audit_pid && audit_pid == task_tgid_vnr(current)) ||
+ (type >= 1000 && type <= 1099))) {
+ long sleep_time = audit_backlog_wait_time;
+
+ while (audit_backlog_limit &&
+ (skb_queue_len(&audit_queue) > audit_backlog_limit)) {
+ /* wake kauditd to try and flush the queue */
+ wake_up_interruptible(&kauditd_wait);
- sleep_time = timeout_start + audit_backlog_wait_time - jiffies;
- if (sleep_time > 0) {
- sleep_time = wait_for_auditd(sleep_time);
- if (sleep_time > 0)
- continue;
+ /* sleep if we are allowed and we haven't exhausted our
+ * backlog wait limit */
+ if ((gfp_mask & __GFP_DIRECT_RECLAIM) &&
+ (sleep_time > 0)) {
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue_exclusive(&audit_backlog_wait,
+ &wait);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ sleep_time = schedule_timeout(sleep_time);
+ remove_wait_queue(&audit_backlog_wait, &wait);
+ } else {
+ if (audit_rate_check() && printk_ratelimit())
+ pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n",
+ skb_queue_len(&audit_queue),
+ audit_backlog_limit);
+ audit_log_lost("backlog limit exceeded");
+ return NULL;
}
}
- if (audit_rate_check() && printk_ratelimit())
- pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n",
- skb_queue_len(&audit_skb_queue),
- audit_backlog_limit);
- audit_log_lost("backlog limit exceeded");
- audit_backlog_wait_time = 0;
- wake_up(&audit_backlog_wait);
- return NULL;
}
- if (!reserve && !audit_backlog_wait_time)
- audit_backlog_wait_time = audit_backlog_wait_time_master;
-
ab = audit_buffer_alloc(ctx, gfp_mask, type);
if (!ab) {
audit_log_lost("out of memory in audit_log_start");
}
audit_get_stamp(ab->ctx, &t, &serial);
-
audit_log_format(ab, "audit(%lu.%03lu:%u): ",
t.tv_sec, t.tv_nsec/1000000, serial);
+
return ab;
}
" euid=%u suid=%u fsuid=%u"
" egid=%u sgid=%u fsgid=%u tty=%s ses=%u",
task_ppid_nr(tsk),
- task_pid_nr(tsk),
+ task_tgid_nr(tsk),
from_kuid(&init_user_ns, audit_get_loginuid(tsk)),
from_kuid(&init_user_ns, cred->uid),
from_kgid(&init_user_ns, cred->gid),
* audit_log_end - end one audit record
* @ab: the audit_buffer
*
- * netlink_unicast() cannot be called inside an irq context because it blocks
- * (last arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed
- * on a queue and a tasklet is scheduled to remove them from the queue outside
- * the irq context. May be called in any context.
+ * We can not do a netlink send inside an irq context because it blocks (last
+ * arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed on a
+ * queue and a tasklet is scheduled to remove them from the queue outside the
+ * irq context. May be called in any context.
*/
void audit_log_end(struct audit_buffer *ab)
{
if (!audit_rate_check()) {
audit_log_lost("rate limit exceeded");
} else {
- struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
-
- nlh->nlmsg_len = ab->skb->len;
- kauditd_send_multicast_skb(ab->skb, ab->gfp_mask);
-
- /*
- * The original kaudit unicast socket sends up messages with
- * nlmsg_len set to the payload length rather than the entire
- * message length. This breaks the standard set by netlink.
- * The existing auditd daemon assumes this breakage. Fixing
- * this would require co-ordinating a change in the established
- * protocol between the kaudit kernel subsystem and the auditd
- * userspace code.
- */
- nlh->nlmsg_len -= NLMSG_HDRLEN;
-
- if (audit_pid) {
- skb_queue_tail(&audit_skb_queue, ab->skb);
- wake_up_interruptible(&kauditd_wait);
- } else {
- audit_printk_skb(ab->skb);
- }
+ skb_queue_tail(&audit_queue, ab->skb);
+ wake_up_interruptible(&kauditd_wait);
ab->skb = NULL;
}
audit_buffer_free(ab);
const struct cred *cred;
int i, need_sid = 1;
u32 sid;
+ unsigned int sessionid;
cred = rcu_dereference_check(tsk->cred, tsk == current || task_creation);
switch (f->type) {
case AUDIT_PID:
- pid = task_pid_nr(tsk);
+ pid = task_tgid_nr(tsk);
result = audit_comparator(pid, f->op, f->val);
break;
case AUDIT_PPID:
case AUDIT_FSGID:
result = audit_gid_comparator(cred->fsgid, f->op, f->gid);
break;
+ case AUDIT_SESSIONID:
+ sessionid = audit_get_sessionid(current);
+ result = audit_comparator(sessionid, f->op, f->val);
+ break;
case AUDIT_PERS:
result = audit_comparator(tsk->personality, f->op, f->val);
break;
long len_rem;
long len_full;
long len_buf;
- long len_abuf;
+ long len_abuf = 0;
long len_tmp;
bool require_data;
bool encode;
loginuid = from_kuid(&init_user_ns, kloginuid),
tty = audit_get_tty(current);
- audit_log_format(ab, "pid=%d uid=%u", task_pid_nr(current), uid);
+ audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid);
audit_log_task_context(ab);
audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d",
oldloginuid, loginuid, tty ? tty_name(tty) : "(none)",
goto out;
/* are we setting or clearing? */
- if (uid_valid(loginuid))
+ if (uid_valid(loginuid)) {
sessionid = (unsigned int)atomic_inc_return(&session_id);
+ if (unlikely(sessionid == (unsigned int)-1))
+ sessionid = (unsigned int)atomic_inc_return(&session_id);
+ }
task->sessionid = sessionid;
task->loginuid = loginuid;
{
struct audit_context *context = current->audit_context;
- context->target_pid = task_pid_nr(t);
+ context->target_pid = task_tgid_nr(t);
context->target_auid = audit_get_loginuid(t);
context->target_uid = task_uid(t);
context->target_sessionid = audit_get_sessionid(t);
if (audit_pid && t->tgid == audit_pid) {
if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) {
- audit_sig_pid = task_pid_nr(tsk);
+ audit_sig_pid = task_tgid_nr(tsk);
if (uid_valid(tsk->loginuid))
audit_sig_uid = tsk->loginuid;
else
void __audit_log_capset(const struct cred *new, const struct cred *old)
{
struct audit_context *context = current->audit_context;
- context->capset.pid = task_pid_nr(current);
+ context->capset.pid = task_tgid_nr(current);
context->capset.cap.effective = new->cap_effective;
context->capset.cap.inheritable = new->cap_effective;
context->capset.cap.permitted = new->cap_permitted;
from_kgid(&init_user_ns, gid),
sessionid);
audit_log_task_context(ab);
- audit_log_format(ab, " pid=%d comm=", task_pid_nr(current));
+ audit_log_format(ab, " pid=%d comm=", task_tgid_nr(current));
audit_log_untrustedstring(ab, get_task_comm(comm, current));
audit_log_d_path_exe(ab, current->mm);
}
};
EXPORT_SYMBOL(init_net);
+static bool init_net_initialized;
+
+#define MIN_PERNET_OPS_ID \
+ ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
+
#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
static struct net_generic *net_alloc_generic(void)
{
struct net_generic *ng;
- size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
+ unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
ng = kzalloc(generic_size, GFP_KERNEL);
if (ng)
- ng->len = max_gen_ptrs;
+ ng->s.len = max_gen_ptrs;
return ng;
}
-static int net_assign_generic(struct net *net, int id, void *data)
+static int net_assign_generic(struct net *net, unsigned int id, void *data)
{
struct net_generic *ng, *old_ng;
BUG_ON(!mutex_is_locked(&net_mutex));
- BUG_ON(id == 0);
+ BUG_ON(id < MIN_PERNET_OPS_ID);
old_ng = rcu_dereference_protected(net->gen,
lockdep_is_held(&net_mutex));
- ng = old_ng;
- if (old_ng->len >= id)
- goto assign;
+ if (old_ng->s.len > id) {
+ old_ng->ptr[id] = data;
+ return 0;
+ }
ng = net_alloc_generic();
if (ng == NULL)
* the old copy for kfree after a grace period.
*/
- memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
+ memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID],
+ (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *));
+ ng->ptr[id] = data;
rcu_assign_pointer(net->gen, ng);
- kfree_rcu(old_ng, rcu);
-assign:
- ng->ptr[id - 1] = data;
+ kfree_rcu(old_ng, s.rcu);
return 0;
}
static void ops_free(const struct pernet_operations *ops, struct net *net)
{
if (ops->id && ops->size) {
- int id = *ops->id;
- kfree(net_generic(net, id));
+ kfree(net_generic(net, *ops->id));
}
}
*/
int peernet2id_alloc(struct net *net, struct net *peer)
{
- unsigned long flags;
bool alloc;
int id;
- spin_lock_irqsave(&net->nsid_lock, flags);
+ if (atomic_read(&net->count) == 0)
+ return NETNSA_NSID_NOT_ASSIGNED;
+ spin_lock_bh(&net->nsid_lock);
alloc = atomic_read(&peer->count) == 0 ? false : true;
id = __peernet2id_alloc(net, peer, &alloc);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
if (alloc && id >= 0)
rtnl_net_notifyid(net, RTM_NEWNSID, id);
return id;
}
-EXPORT_SYMBOL(peernet2id_alloc);
/* This function returns, if assigned, the id of a peer netns. */
int peernet2id(struct net *net, struct net *peer)
{
- unsigned long flags;
int id;
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
id = __peernet2id(net, peer);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
return id;
}
+EXPORT_SYMBOL(peernet2id);
/* This function returns true is the peer netns has an id assigned into the
* current netns.
struct net *get_net_ns_by_id(struct net *net, int id)
{
- unsigned long flags;
struct net *peer;
if (id < 0)
return NULL;
rcu_read_lock();
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
peer = idr_find(&net->netns_ids, id);
if (peer)
get_net(peer);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
rcu_read_unlock();
return peer;
#ifdef CONFIG_NET_NS
+static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
+{
+ return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
+}
+
+static void dec_net_namespaces(struct ucounts *ucounts)
+{
+ dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
+}
+
static struct kmem_cache *net_cachep;
static struct workqueue_struct *netns_wq;
struct net *copy_net_ns(unsigned long flags,
struct user_namespace *user_ns, struct net *old_net)
{
+ struct ucounts *ucounts;
struct net *net;
int rv;
if (!(flags & CLONE_NEWNET))
return get_net(old_net);
+ ucounts = inc_net_namespaces(user_ns);
+ if (!ucounts)
+ return ERR_PTR(-ENOSPC);
+
net = net_alloc();
- if (!net)
+ if (!net) {
+ dec_net_namespaces(ucounts);
return ERR_PTR(-ENOMEM);
+ }
get_user_ns(user_ns);
- mutex_lock(&net_mutex);
+ rv = mutex_lock_killable(&net_mutex);
+ if (rv < 0) {
+ net_free(net);
+ dec_net_namespaces(ucounts);
+ put_user_ns(user_ns);
+ return ERR_PTR(rv);
+ }
+
+ net->ucounts = ucounts;
rv = setup_net(net, user_ns);
if (rv == 0) {
rtnl_lock();
}
mutex_unlock(&net_mutex);
if (rv < 0) {
+ dec_net_namespaces(ucounts);
put_user_ns(user_ns);
net_drop_ns(net);
return ERR_PTR(rv);
for_each_net(tmp) {
int id;
- spin_lock_irq(&tmp->nsid_lock);
+ spin_lock_bh(&tmp->nsid_lock);
id = __peernet2id(tmp, net);
if (id >= 0)
idr_remove(&tmp->netns_ids, id);
- spin_unlock_irq(&tmp->nsid_lock);
+ spin_unlock_bh(&tmp->nsid_lock);
if (id >= 0)
rtnl_net_notifyid(tmp, RTM_DELNSID, id);
}
- spin_lock_irq(&net->nsid_lock);
+ spin_lock_bh(&net->nsid_lock);
idr_destroy(&net->netns_ids);
- spin_unlock_irq(&net->nsid_lock);
+ spin_unlock_bh(&net->nsid_lock);
}
rtnl_unlock();
/* Finally it is safe to free my network namespace structure */
list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
list_del_init(&net->exit_list);
+ dec_net_namespaces(net->ucounts);
put_user_ns(net->user_ns);
net_drop_ns(net);
}
.exit = net_ns_net_exit,
};
-static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
+static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
[NETNSA_NONE] = { .type = NLA_UNSPEC },
[NETNSA_NSID] = { .type = NLA_S32 },
[NETNSA_PID] = { .type = NLA_U32 },
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
- unsigned long flags;
struct net *peer;
int nsid, err;
if (IS_ERR(peer))
return PTR_ERR(peer);
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
if (__peernet2id(net, peer) >= 0) {
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
err = -EEXIST;
goto out;
}
err = alloc_netid(net, peer, nsid);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
if (err >= 0) {
rtnl_net_notifyid(net, RTM_NEWNSID, err);
err = 0;
.idx = 0,
.s_idx = cb->args[0],
};
- unsigned long flags;
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
cb->args[0] = net_cb.idx;
return skb->len;
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
+ init_net_initialized = true;
+
rtnl_lock();
list_add_tail_rcu(&init_net.list, &net_namespace_list);
rtnl_unlock();
static int __register_pernet_operations(struct list_head *list,
struct pernet_operations *ops)
{
+ if (!init_net_initialized) {
+ list_add_tail(&ops->list, list);
+ return 0;
+ }
+
return ops_init(ops, &init_net);
}
static void __unregister_pernet_operations(struct pernet_operations *ops)
{
- LIST_HEAD(net_exit_list);
- list_add(&init_net.exit_list, &net_exit_list);
- ops_exit_list(ops, &net_exit_list);
- ops_free_list(ops, &net_exit_list);
+ if (!init_net_initialized) {
+ list_del(&ops->list);
+ } else {
+ LIST_HEAD(net_exit_list);
+ list_add(&init_net.exit_list, &net_exit_list);
+ ops_exit_list(ops, &net_exit_list);
+ ops_free_list(ops, &net_exit_list);
+ }
}
#endif /* CONFIG_NET_NS */
if (ops->id) {
again:
- error = ida_get_new_above(&net_generic_ids, 1, ops->id);
+ error = ida_get_new_above(&net_generic_ids, MIN_PERNET_OPS_ID, ops->id);
if (error < 0) {
if (error == -EAGAIN) {
ida_pre_get(&net_generic_ids, GFP_KERNEL);
}
return error;
}
- max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
+ max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
}
error = __register_pernet_operations(list, ops);
if (error) {
return 0;
}
+static struct user_namespace *netns_owner(struct ns_common *ns)
+{
+ return to_net_ns(ns)->user_ns;
+}
+
const struct proc_ns_operations netns_operations = {
.name = "net",
.type = CLONE_NEWNET,
.get = netns_get,
.put = netns_put,
.install = netns_install,
+ .owner = netns_owner,
};
#endif