exec: separate MM_ANONPAGES and RLIMIT_STACK accounting
authorOleg Nesterov <oleg@redhat.com>
Thu, 3 Jan 2019 23:28:11 +0000 (15:28 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 4 Jan 2019 21:13:47 +0000 (13:13 -0800)
get_arg_page() checks bprm->rlim_stack.rlim_cur and re-calculates the
"extra" size for argv/envp pointers every time, this is a bit ugly and
even not strictly correct: acct_arg_size() must not account this size.

Remove all the rlimit code in get_arg_page().  Instead, add bprm->argmin
calculated once at the start of __do_execve_file() and change
copy_strings to check bprm->p >= bprm->argmin.

The patch adds the new helper, prepare_arg_pages() which initializes
bprm->argc/envc and bprm->argmin.

[oleg@redhat.com: fix !CONFIG_MMU version of get_arg_page()]
Link: http://lkml.kernel.org/r/20181126122307.GA1660@redhat.com
[akpm@linux-foundation.org: use max_t]
Link: http://lkml.kernel.org/r/20181112160910.GA28440@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/exec.c
include/linux/binfmts.h

index fc281b738a9822a652f7d19bb60ae15acd7a7ebf..ea7d439cf79efe95b9132185ae417b1b9d08efe9 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -218,55 +218,10 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
        if (ret <= 0)
                return NULL;
 
-       if (write) {
-               unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
-               unsigned long ptr_size, limit;
-
-               /*
-                * Since the stack will hold pointers to the strings, we
-                * must account for them as well.
-                *
-                * The size calculation is the entire vma while each arg page is
-                * built, so each time we get here it's calculating how far it
-                * is currently (rather than each call being just the newly
-                * added size from the arg page).  As a result, we need to
-                * always add the entire size of the pointers, so that on the
-                * last call to get_arg_page() we'll actually have the entire
-                * correct size.
-                */
-               ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
-               if (ptr_size > ULONG_MAX - size)
-                       goto fail;
-               size += ptr_size;
-
-               acct_arg_size(bprm, size / PAGE_SIZE);
-
-               /*
-                * We've historically supported up to 32 pages (ARG_MAX)
-                * of argument strings even with small stacks
-                */
-               if (size <= ARG_MAX)
-                       return page;
-
-               /*
-                * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
-                * (whichever is smaller) for the argv+env strings.
-                * This ensures that:
-                *  - the remaining binfmt code will not run out of stack space,
-                *  - the program will have a reasonable amount of stack left
-                *    to work from.
-                */
-               limit = _STK_LIM / 4 * 3;
-               limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
-               if (size > limit)
-                       goto fail;
-       }
+       if (write)
+               acct_arg_size(bprm, vma_pages(bprm->vma));
 
        return page;
-
-fail:
-       put_page(page);
-       return NULL;
 }
 
 static void put_arg_page(struct page *page)
@@ -492,6 +447,50 @@ static int count(struct user_arg_ptr argv, int max)
        return i;
 }
 
+static int prepare_arg_pages(struct linux_binprm *bprm,
+                       struct user_arg_ptr argv, struct user_arg_ptr envp)
+{
+       unsigned long limit, ptr_size;
+
+       bprm->argc = count(argv, MAX_ARG_STRINGS);
+       if (bprm->argc < 0)
+               return bprm->argc;
+
+       bprm->envc = count(envp, MAX_ARG_STRINGS);
+       if (bprm->envc < 0)
+               return bprm->envc;
+
+       /*
+        * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
+        * (whichever is smaller) for the argv+env strings.
+        * This ensures that:
+        *  - the remaining binfmt code will not run out of stack space,
+        *  - the program will have a reasonable amount of stack left
+        *    to work from.
+        */
+       limit = _STK_LIM / 4 * 3;
+       limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
+       /*
+        * We've historically supported up to 32 pages (ARG_MAX)
+        * of argument strings even with small stacks
+        */
+       limit = max_t(unsigned long, limit, ARG_MAX);
+       /*
+        * We must account for the size of all the argv and envp pointers to
+        * the argv and envp strings, since they will also take up space in
+        * the stack. They aren't stored until much later when we can't
+        * signal to the parent that the child has run out of stack space.
+        * Instead, calculate it here so it's possible to fail gracefully.
+        */
+       ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+       if (limit <= ptr_size)
+               return -E2BIG;
+       limit -= ptr_size;
+
+       bprm->argmin = bprm->p - limit;
+       return 0;
+}
+
 /*
  * 'copy_strings()' copies argument/environment strings from the old
  * processes's memory to the new process's stack.  The call to get_user_pages()
@@ -527,6 +526,10 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
                pos = bprm->p;
                str += len;
                bprm->p -= len;
+#ifdef CONFIG_MMU
+               if (bprm->p < bprm->argmin)
+                       goto out;
+#endif
 
                while (len > 0) {
                        int offset, bytes_to_copy;
@@ -1789,12 +1792,8 @@ static int __do_execve_file(int fd, struct filename *filename,
        if (retval)
                goto out_unmark;
 
-       bprm->argc = count(argv, MAX_ARG_STRINGS);
-       if ((retval = bprm->argc) < 0)
-               goto out;
-
-       bprm->envc = count(envp, MAX_ARG_STRINGS);
-       if ((retval = bprm->envc) < 0)
+       retval = prepare_arg_pages(bprm, argv, envp);
+       if (retval < 0)
                goto out;
 
        retval = prepare_binprm(bprm);
index e9f5fe69df312d022648dd69e21ca00d2daf45a5..03200a8c01787c68abcb63c1d8ea267de40a8093 100644 (file)
@@ -25,6 +25,7 @@ struct linux_binprm {
 #endif
        struct mm_struct *mm;
        unsigned long p; /* current top of mem */
+       unsigned long argmin; /* rlimit marker for copy_strings() */
        unsigned int
                /*
                 * True after the bprm_set_creds hook has been called once