Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph...
[platform/kernel/linux-rpi.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static int load_elf_library(struct file *);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51                                 int, int, unsigned long);
52
53 /*
54  * If we don't support core dumping, then supply a NULL so we
55  * don't even try.
56  */
57 #ifdef CONFIG_ELF_CORE
58 static int elf_core_dump(struct coredump_params *cprm);
59 #else
60 #define elf_core_dump   NULL
61 #endif
62
63 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
64 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
65 #else
66 #define ELF_MIN_ALIGN   PAGE_SIZE
67 #endif
68
69 #ifndef ELF_CORE_EFLAGS
70 #define ELF_CORE_EFLAGS 0
71 #endif
72
73 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
74 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
75 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
76
77 static struct linux_binfmt elf_format = {
78         .module         = THIS_MODULE,
79         .load_binary    = load_elf_binary,
80         .load_shlib     = load_elf_library,
81         .core_dump      = elf_core_dump,
82         .min_coredump   = ELF_EXEC_PAGESIZE,
83 };
84
85 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
86
87 static int set_brk(unsigned long start, unsigned long end)
88 {
89         start = ELF_PAGEALIGN(start);
90         end = ELF_PAGEALIGN(end);
91         if (end > start) {
92                 unsigned long addr;
93                 addr = vm_brk(start, end - start);
94                 if (BAD_ADDR(addr))
95                         return addr;
96         }
97         current->mm->start_brk = current->mm->brk = end;
98         return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108         unsigned long nbyte;
109
110         nbyte = ELF_PAGEOFFSET(elf_bss);
111         if (nbyte) {
112                 nbyte = ELF_MIN_ALIGN - nbyte;
113                 if (clear_user((void __user *) elf_bss, nbyte))
114                         return -EFAULT;
115         }
116         return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126         old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130         (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 #ifndef ELF_BASE_PLATFORM
135 /*
136  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138  * will be copied to the user stack in the same manner as AT_PLATFORM.
139  */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142
143 static int
144 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
145                 unsigned long load_addr, unsigned long interp_load_addr)
146 {
147         unsigned long p = bprm->p;
148         int argc = bprm->argc;
149         int envc = bprm->envc;
150         elf_addr_t __user *argv;
151         elf_addr_t __user *envp;
152         elf_addr_t __user *sp;
153         elf_addr_t __user *u_platform;
154         elf_addr_t __user *u_base_platform;
155         elf_addr_t __user *u_rand_bytes;
156         const char *k_platform = ELF_PLATFORM;
157         const char *k_base_platform = ELF_BASE_PLATFORM;
158         unsigned char k_rand_bytes[16];
159         int items;
160         elf_addr_t *elf_info;
161         int ei_index = 0;
162         const struct cred *cred = current_cred();
163         struct vm_area_struct *vma;
164
165         /*
166          * In some cases (e.g. Hyper-Threading), we want to avoid L1
167          * evictions by the processes running on the same package. One
168          * thing we can do is to shuffle the initial stack for them.
169          */
170
171         p = arch_align_stack(p);
172
173         /*
174          * If this architecture has a platform capability string, copy it
175          * to userspace.  In some cases (Sparc), this info is impossible
176          * for userspace to get any other way, in others (i386) it is
177          * merely difficult.
178          */
179         u_platform = NULL;
180         if (k_platform) {
181                 size_t len = strlen(k_platform) + 1;
182
183                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
184                 if (__copy_to_user(u_platform, k_platform, len))
185                         return -EFAULT;
186         }
187
188         /*
189          * If this architecture has a "base" platform capability
190          * string, copy it to userspace.
191          */
192         u_base_platform = NULL;
193         if (k_base_platform) {
194                 size_t len = strlen(k_base_platform) + 1;
195
196                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197                 if (__copy_to_user(u_base_platform, k_base_platform, len))
198                         return -EFAULT;
199         }
200
201         /*
202          * Generate 16 random bytes for userspace PRNG seeding.
203          */
204         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
205         u_rand_bytes = (elf_addr_t __user *)
206                        STACK_ALLOC(p, sizeof(k_rand_bytes));
207         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
208                 return -EFAULT;
209
210         /* Create the ELF interpreter info */
211         elf_info = (elf_addr_t *)current->mm->saved_auxv;
212         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
213 #define NEW_AUX_ENT(id, val) \
214         do { \
215                 elf_info[ei_index++] = id; \
216                 elf_info[ei_index++] = val; \
217         } while (0)
218
219 #ifdef ARCH_DLINFO
220         /* 
221          * ARCH_DLINFO must come first so PPC can do its special alignment of
222          * AUXV.
223          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
224          * ARCH_DLINFO changes
225          */
226         ARCH_DLINFO;
227 #endif
228         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
229         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
230         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
231         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
232         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
233         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
234         NEW_AUX_ENT(AT_BASE, interp_load_addr);
235         NEW_AUX_ENT(AT_FLAGS, 0);
236         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
237         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
238         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
239         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
240         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
241         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
242         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
243 #ifdef ELF_HWCAP2
244         NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
245 #endif
246         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
247         if (k_platform) {
248                 NEW_AUX_ENT(AT_PLATFORM,
249                             (elf_addr_t)(unsigned long)u_platform);
250         }
251         if (k_base_platform) {
252                 NEW_AUX_ENT(AT_BASE_PLATFORM,
253                             (elf_addr_t)(unsigned long)u_base_platform);
254         }
255         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
256                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
257         }
258 #undef NEW_AUX_ENT
259         /* AT_NULL is zero; clear the rest too */
260         memset(&elf_info[ei_index], 0,
261                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
262
263         /* And advance past the AT_NULL entry.  */
264         ei_index += 2;
265
266         sp = STACK_ADD(p, ei_index);
267
268         items = (argc + 1) + (envc + 1) + 1;
269         bprm->p = STACK_ROUND(sp, items);
270
271         /* Point sp at the lowest address on the stack */
272 #ifdef CONFIG_STACK_GROWSUP
273         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
274         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
275 #else
276         sp = (elf_addr_t __user *)bprm->p;
277 #endif
278
279
280         /*
281          * Grow the stack manually; some architectures have a limit on how
282          * far ahead a user-space access may be in order to grow the stack.
283          */
284         vma = find_extend_vma(current->mm, bprm->p);
285         if (!vma)
286                 return -EFAULT;
287
288         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
289         if (__put_user(argc, sp++))
290                 return -EFAULT;
291         argv = sp;
292         envp = argv + argc + 1;
293
294         /* Populate argv and envp */
295         p = current->mm->arg_end = current->mm->arg_start;
296         while (argc-- > 0) {
297                 size_t len;
298                 if (__put_user((elf_addr_t)p, argv++))
299                         return -EFAULT;
300                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
301                 if (!len || len > MAX_ARG_STRLEN)
302                         return -EINVAL;
303                 p += len;
304         }
305         if (__put_user(0, argv))
306                 return -EFAULT;
307         current->mm->arg_end = current->mm->env_start = p;
308         while (envc-- > 0) {
309                 size_t len;
310                 if (__put_user((elf_addr_t)p, envp++))
311                         return -EFAULT;
312                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
313                 if (!len || len > MAX_ARG_STRLEN)
314                         return -EINVAL;
315                 p += len;
316         }
317         if (__put_user(0, envp))
318                 return -EFAULT;
319         current->mm->env_end = p;
320
321         /* Put the elf_info on the stack in the right place.  */
322         sp = (elf_addr_t __user *)envp + 1;
323         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
324                 return -EFAULT;
325         return 0;
326 }
327
328 #ifndef elf_map
329
330 static unsigned long elf_map(struct file *filep, unsigned long addr,
331                 struct elf_phdr *eppnt, int prot, int type,
332                 unsigned long total_size)
333 {
334         unsigned long map_addr;
335         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
336         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
337         addr = ELF_PAGESTART(addr);
338         size = ELF_PAGEALIGN(size);
339
340         /* mmap() will return -EINVAL if given a zero size, but a
341          * segment with zero filesize is perfectly valid */
342         if (!size)
343                 return addr;
344
345         /*
346         * total_size is the size of the ELF (interpreter) image.
347         * The _first_ mmap needs to know the full size, otherwise
348         * randomization might put this image into an overlapping
349         * position with the ELF binary image. (since size < total_size)
350         * So we first map the 'big' image - and unmap the remainder at
351         * the end. (which unmap is needed for ELF images with holes.)
352         */
353         if (total_size) {
354                 total_size = ELF_PAGEALIGN(total_size);
355                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
356                 if (!BAD_ADDR(map_addr))
357                         vm_munmap(map_addr+size, total_size-size);
358         } else
359                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
360
361         return(map_addr);
362 }
363
364 #endif /* !elf_map */
365
366 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
367 {
368         int i, first_idx = -1, last_idx = -1;
369
370         for (i = 0; i < nr; i++) {
371                 if (cmds[i].p_type == PT_LOAD) {
372                         last_idx = i;
373                         if (first_idx == -1)
374                                 first_idx = i;
375                 }
376         }
377         if (first_idx == -1)
378                 return 0;
379
380         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
381                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
382 }
383
384
385 /* This is much more generalized than the library routine read function,
386    so we keep this separate.  Technically the library read function
387    is only provided so that we can read a.out libraries that have
388    an ELF header */
389
390 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
391                 struct file *interpreter, unsigned long *interp_map_addr,
392                 unsigned long no_base)
393 {
394         struct elf_phdr *elf_phdata;
395         struct elf_phdr *eppnt;
396         unsigned long load_addr = 0;
397         int load_addr_set = 0;
398         unsigned long last_bss = 0, elf_bss = 0;
399         unsigned long error = ~0UL;
400         unsigned long total_size;
401         int retval, i, size;
402
403         /* First of all, some simple consistency checks */
404         if (interp_elf_ex->e_type != ET_EXEC &&
405             interp_elf_ex->e_type != ET_DYN)
406                 goto out;
407         if (!elf_check_arch(interp_elf_ex))
408                 goto out;
409         if (!interpreter->f_op || !interpreter->f_op->mmap)
410                 goto out;
411
412         /*
413          * If the size of this structure has changed, then punt, since
414          * we will be doing the wrong thing.
415          */
416         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
417                 goto out;
418         if (interp_elf_ex->e_phnum < 1 ||
419                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
420                 goto out;
421
422         /* Now read in all of the header information */
423         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
424         if (size > ELF_MIN_ALIGN)
425                 goto out;
426         elf_phdata = kmalloc(size, GFP_KERNEL);
427         if (!elf_phdata)
428                 goto out;
429
430         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
431                              (char *)elf_phdata, size);
432         error = -EIO;
433         if (retval != size) {
434                 if (retval < 0)
435                         error = retval; 
436                 goto out_close;
437         }
438
439         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
440         if (!total_size) {
441                 error = -EINVAL;
442                 goto out_close;
443         }
444
445         eppnt = elf_phdata;
446         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
447                 if (eppnt->p_type == PT_LOAD) {
448                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
449                         int elf_prot = 0;
450                         unsigned long vaddr = 0;
451                         unsigned long k, map_addr;
452
453                         if (eppnt->p_flags & PF_R)
454                                 elf_prot = PROT_READ;
455                         if (eppnt->p_flags & PF_W)
456                                 elf_prot |= PROT_WRITE;
457                         if (eppnt->p_flags & PF_X)
458                                 elf_prot |= PROT_EXEC;
459                         vaddr = eppnt->p_vaddr;
460                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
461                                 elf_type |= MAP_FIXED;
462                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
463                                 load_addr = -vaddr;
464
465                         map_addr = elf_map(interpreter, load_addr + vaddr,
466                                         eppnt, elf_prot, elf_type, total_size);
467                         total_size = 0;
468                         if (!*interp_map_addr)
469                                 *interp_map_addr = map_addr;
470                         error = map_addr;
471                         if (BAD_ADDR(map_addr))
472                                 goto out_close;
473
474                         if (!load_addr_set &&
475                             interp_elf_ex->e_type == ET_DYN) {
476                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
477                                 load_addr_set = 1;
478                         }
479
480                         /*
481                          * Check to see if the section's size will overflow the
482                          * allowed task size. Note that p_filesz must always be
483                          * <= p_memsize so it's only necessary to check p_memsz.
484                          */
485                         k = load_addr + eppnt->p_vaddr;
486                         if (BAD_ADDR(k) ||
487                             eppnt->p_filesz > eppnt->p_memsz ||
488                             eppnt->p_memsz > TASK_SIZE ||
489                             TASK_SIZE - eppnt->p_memsz < k) {
490                                 error = -ENOMEM;
491                                 goto out_close;
492                         }
493
494                         /*
495                          * Find the end of the file mapping for this phdr, and
496                          * keep track of the largest address we see for this.
497                          */
498                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
499                         if (k > elf_bss)
500                                 elf_bss = k;
501
502                         /*
503                          * Do the same thing for the memory mapping - between
504                          * elf_bss and last_bss is the bss section.
505                          */
506                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
507                         if (k > last_bss)
508                                 last_bss = k;
509                 }
510         }
511
512         if (last_bss > elf_bss) {
513                 /*
514                  * Now fill out the bss section.  First pad the last page up
515                  * to the page boundary, and then perform a mmap to make sure
516                  * that there are zero-mapped pages up to and including the
517                  * last bss page.
518                  */
519                 if (padzero(elf_bss)) {
520                         error = -EFAULT;
521                         goto out_close;
522                 }
523
524                 /* What we have mapped so far */
525                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
526
527                 /* Map the last of the bss segment */
528                 error = vm_brk(elf_bss, last_bss - elf_bss);
529                 if (BAD_ADDR(error))
530                         goto out_close;
531         }
532
533         error = load_addr;
534
535 out_close:
536         kfree(elf_phdata);
537 out:
538         return error;
539 }
540
541 /*
542  * These are the functions used to load ELF style executables and shared
543  * libraries.  There is no binary dependent code anywhere else.
544  */
545
546 #define INTERPRETER_NONE 0
547 #define INTERPRETER_ELF 2
548
549 #ifndef STACK_RND_MASK
550 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
551 #endif
552
553 static unsigned long randomize_stack_top(unsigned long stack_top)
554 {
555         unsigned int random_variable = 0;
556
557         if ((current->flags & PF_RANDOMIZE) &&
558                 !(current->personality & ADDR_NO_RANDOMIZE)) {
559                 random_variable = get_random_int() & STACK_RND_MASK;
560                 random_variable <<= PAGE_SHIFT;
561         }
562 #ifdef CONFIG_STACK_GROWSUP
563         return PAGE_ALIGN(stack_top) + random_variable;
564 #else
565         return PAGE_ALIGN(stack_top) - random_variable;
566 #endif
567 }
568
569 static int load_elf_binary(struct linux_binprm *bprm)
570 {
571         struct file *interpreter = NULL; /* to shut gcc up */
572         unsigned long load_addr = 0, load_bias = 0;
573         int load_addr_set = 0;
574         char * elf_interpreter = NULL;
575         unsigned long error;
576         struct elf_phdr *elf_ppnt, *elf_phdata;
577         unsigned long elf_bss, elf_brk;
578         int retval, i;
579         unsigned int size;
580         unsigned long elf_entry;
581         unsigned long interp_load_addr = 0;
582         unsigned long start_code, end_code, start_data, end_data;
583         unsigned long reloc_func_desc __maybe_unused = 0;
584         int executable_stack = EXSTACK_DEFAULT;
585         unsigned long def_flags = 0;
586         struct pt_regs *regs = current_pt_regs();
587         struct {
588                 struct elfhdr elf_ex;
589                 struct elfhdr interp_elf_ex;
590         } *loc;
591
592         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
593         if (!loc) {
594                 retval = -ENOMEM;
595                 goto out_ret;
596         }
597         
598         /* Get the exec-header */
599         loc->elf_ex = *((struct elfhdr *)bprm->buf);
600
601         retval = -ENOEXEC;
602         /* First of all, some simple consistency checks */
603         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
604                 goto out;
605
606         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
607                 goto out;
608         if (!elf_check_arch(&loc->elf_ex))
609                 goto out;
610         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
611                 goto out;
612
613         /* Now read in all of the header information */
614         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
615                 goto out;
616         if (loc->elf_ex.e_phnum < 1 ||
617                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
618                 goto out;
619         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
620         retval = -ENOMEM;
621         elf_phdata = kmalloc(size, GFP_KERNEL);
622         if (!elf_phdata)
623                 goto out;
624
625         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
626                              (char *)elf_phdata, size);
627         if (retval != size) {
628                 if (retval >= 0)
629                         retval = -EIO;
630                 goto out_free_ph;
631         }
632
633         elf_ppnt = elf_phdata;
634         elf_bss = 0;
635         elf_brk = 0;
636
637         start_code = ~0UL;
638         end_code = 0;
639         start_data = 0;
640         end_data = 0;
641
642         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
643                 if (elf_ppnt->p_type == PT_INTERP) {
644                         /* This is the program interpreter used for
645                          * shared libraries - for now assume that this
646                          * is an a.out format binary
647                          */
648                         retval = -ENOEXEC;
649                         if (elf_ppnt->p_filesz > PATH_MAX || 
650                             elf_ppnt->p_filesz < 2)
651                                 goto out_free_ph;
652
653                         retval = -ENOMEM;
654                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
655                                                   GFP_KERNEL);
656                         if (!elf_interpreter)
657                                 goto out_free_ph;
658
659                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
660                                              elf_interpreter,
661                                              elf_ppnt->p_filesz);
662                         if (retval != elf_ppnt->p_filesz) {
663                                 if (retval >= 0)
664                                         retval = -EIO;
665                                 goto out_free_interp;
666                         }
667                         /* make sure path is NULL terminated */
668                         retval = -ENOEXEC;
669                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
670                                 goto out_free_interp;
671
672                         interpreter = open_exec(elf_interpreter);
673                         retval = PTR_ERR(interpreter);
674                         if (IS_ERR(interpreter))
675                                 goto out_free_interp;
676
677                         /*
678                          * If the binary is not readable then enforce
679                          * mm->dumpable = 0 regardless of the interpreter's
680                          * permissions.
681                          */
682                         would_dump(bprm, interpreter);
683
684                         retval = kernel_read(interpreter, 0, bprm->buf,
685                                              BINPRM_BUF_SIZE);
686                         if (retval != BINPRM_BUF_SIZE) {
687                                 if (retval >= 0)
688                                         retval = -EIO;
689                                 goto out_free_dentry;
690                         }
691
692                         /* Get the exec headers */
693                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
694                         break;
695                 }
696                 elf_ppnt++;
697         }
698
699         elf_ppnt = elf_phdata;
700         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
701                 if (elf_ppnt->p_type == PT_GNU_STACK) {
702                         if (elf_ppnt->p_flags & PF_X)
703                                 executable_stack = EXSTACK_ENABLE_X;
704                         else
705                                 executable_stack = EXSTACK_DISABLE_X;
706                         break;
707                 }
708
709         /* Some simple consistency checks for the interpreter */
710         if (elf_interpreter) {
711                 retval = -ELIBBAD;
712                 /* Not an ELF interpreter */
713                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
714                         goto out_free_dentry;
715                 /* Verify the interpreter has a valid arch */
716                 if (!elf_check_arch(&loc->interp_elf_ex))
717                         goto out_free_dentry;
718         }
719
720         /* Flush all traces of the currently running executable */
721         retval = flush_old_exec(bprm);
722         if (retval)
723                 goto out_free_dentry;
724
725         /* OK, This is the point of no return */
726         current->mm->def_flags = def_flags;
727
728         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
729            may depend on the personality.  */
730         SET_PERSONALITY(loc->elf_ex);
731         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
732                 current->personality |= READ_IMPLIES_EXEC;
733
734         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
735                 current->flags |= PF_RANDOMIZE;
736
737         setup_new_exec(bprm);
738
739         /* Do this so that we can load the interpreter, if need be.  We will
740            change some of these later */
741         current->mm->free_area_cache = current->mm->mmap_base;
742         current->mm->cached_hole_size = 0;
743         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
744                                  executable_stack);
745         if (retval < 0) {
746                 send_sig(SIGKILL, current, 0);
747                 goto out_free_dentry;
748         }
749         
750         current->mm->start_stack = bprm->p;
751
752         /* Now we do a little grungy work by mmapping the ELF image into
753            the correct location in memory. */
754         for(i = 0, elf_ppnt = elf_phdata;
755             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
756                 int elf_prot = 0, elf_flags;
757                 unsigned long k, vaddr;
758
759                 if (elf_ppnt->p_type != PT_LOAD)
760                         continue;
761
762                 if (unlikely (elf_brk > elf_bss)) {
763                         unsigned long nbyte;
764                     
765                         /* There was a PT_LOAD segment with p_memsz > p_filesz
766                            before this one. Map anonymous pages, if needed,
767                            and clear the area.  */
768                         retval = set_brk(elf_bss + load_bias,
769                                          elf_brk + load_bias);
770                         if (retval) {
771                                 send_sig(SIGKILL, current, 0);
772                                 goto out_free_dentry;
773                         }
774                         nbyte = ELF_PAGEOFFSET(elf_bss);
775                         if (nbyte) {
776                                 nbyte = ELF_MIN_ALIGN - nbyte;
777                                 if (nbyte > elf_brk - elf_bss)
778                                         nbyte = elf_brk - elf_bss;
779                                 if (clear_user((void __user *)elf_bss +
780                                                         load_bias, nbyte)) {
781                                         /*
782                                          * This bss-zeroing can fail if the ELF
783                                          * file specifies odd protections. So
784                                          * we don't check the return value
785                                          */
786                                 }
787                         }
788                 }
789
790                 if (elf_ppnt->p_flags & PF_R)
791                         elf_prot |= PROT_READ;
792                 if (elf_ppnt->p_flags & PF_W)
793                         elf_prot |= PROT_WRITE;
794                 if (elf_ppnt->p_flags & PF_X)
795                         elf_prot |= PROT_EXEC;
796
797                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
798
799                 vaddr = elf_ppnt->p_vaddr;
800                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
801                         elf_flags |= MAP_FIXED;
802                 } else if (loc->elf_ex.e_type == ET_DYN) {
803                         /* Try and get dynamic programs out of the way of the
804                          * default mmap base, as well as whatever program they
805                          * might try to exec.  This is because the brk will
806                          * follow the loader, and is not movable.  */
807 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
808                         /* Memory randomization might have been switched off
809                          * in runtime via sysctl or explicit setting of
810                          * personality flags.
811                          * If that is the case, retain the original non-zero
812                          * load_bias value in order to establish proper
813                          * non-randomized mappings.
814                          */
815                         if (current->flags & PF_RANDOMIZE)
816                                 load_bias = 0;
817                         else
818                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
819 #else
820                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
821 #endif
822                 }
823
824                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
825                                 elf_prot, elf_flags, 0);
826                 if (BAD_ADDR(error)) {
827                         send_sig(SIGKILL, current, 0);
828                         retval = IS_ERR((void *)error) ?
829                                 PTR_ERR((void*)error) : -EINVAL;
830                         goto out_free_dentry;
831                 }
832
833                 if (!load_addr_set) {
834                         load_addr_set = 1;
835                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
836                         if (loc->elf_ex.e_type == ET_DYN) {
837                                 load_bias += error -
838                                              ELF_PAGESTART(load_bias + vaddr);
839                                 load_addr += load_bias;
840                                 reloc_func_desc = load_bias;
841                         }
842                 }
843                 k = elf_ppnt->p_vaddr;
844                 if (k < start_code)
845                         start_code = k;
846                 if (start_data < k)
847                         start_data = k;
848
849                 /*
850                  * Check to see if the section's size will overflow the
851                  * allowed task size. Note that p_filesz must always be
852                  * <= p_memsz so it is only necessary to check p_memsz.
853                  */
854                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
855                     elf_ppnt->p_memsz > TASK_SIZE ||
856                     TASK_SIZE - elf_ppnt->p_memsz < k) {
857                         /* set_brk can never work. Avoid overflows. */
858                         send_sig(SIGKILL, current, 0);
859                         retval = -EINVAL;
860                         goto out_free_dentry;
861                 }
862
863                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
864
865                 if (k > elf_bss)
866                         elf_bss = k;
867                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
868                         end_code = k;
869                 if (end_data < k)
870                         end_data = k;
871                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
872                 if (k > elf_brk)
873                         elf_brk = k;
874         }
875
876         loc->elf_ex.e_entry += load_bias;
877         elf_bss += load_bias;
878         elf_brk += load_bias;
879         start_code += load_bias;
880         end_code += load_bias;
881         start_data += load_bias;
882         end_data += load_bias;
883
884         /* Calling set_brk effectively mmaps the pages that we need
885          * for the bss and break sections.  We must do this before
886          * mapping in the interpreter, to make sure it doesn't wind
887          * up getting placed where the bss needs to go.
888          */
889         retval = set_brk(elf_bss, elf_brk);
890         if (retval) {
891                 send_sig(SIGKILL, current, 0);
892                 goto out_free_dentry;
893         }
894         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
895                 send_sig(SIGSEGV, current, 0);
896                 retval = -EFAULT; /* Nobody gets to see this, but.. */
897                 goto out_free_dentry;
898         }
899
900         if (elf_interpreter) {
901                 unsigned long interp_map_addr = 0;
902
903                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
904                                             interpreter,
905                                             &interp_map_addr,
906                                             load_bias);
907                 if (!IS_ERR((void *)elf_entry)) {
908                         /*
909                          * load_elf_interp() returns relocation
910                          * adjustment
911                          */
912                         interp_load_addr = elf_entry;
913                         elf_entry += loc->interp_elf_ex.e_entry;
914                 }
915                 if (BAD_ADDR(elf_entry)) {
916                         force_sig(SIGSEGV, current);
917                         retval = IS_ERR((void *)elf_entry) ?
918                                         (int)elf_entry : -EINVAL;
919                         goto out_free_dentry;
920                 }
921                 reloc_func_desc = interp_load_addr;
922
923                 allow_write_access(interpreter);
924                 fput(interpreter);
925                 kfree(elf_interpreter);
926         } else {
927                 elf_entry = loc->elf_ex.e_entry;
928                 if (BAD_ADDR(elf_entry)) {
929                         force_sig(SIGSEGV, current);
930                         retval = -EINVAL;
931                         goto out_free_dentry;
932                 }
933         }
934
935         kfree(elf_phdata);
936
937         set_binfmt(&elf_format);
938
939 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
940         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
941         if (retval < 0) {
942                 send_sig(SIGKILL, current, 0);
943                 goto out;
944         }
945 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
946
947         install_exec_creds(bprm);
948         retval = create_elf_tables(bprm, &loc->elf_ex,
949                           load_addr, interp_load_addr);
950         if (retval < 0) {
951                 send_sig(SIGKILL, current, 0);
952                 goto out;
953         }
954         /* N.B. passed_fileno might not be initialized? */
955         current->mm->end_code = end_code;
956         current->mm->start_code = start_code;
957         current->mm->start_data = start_data;
958         current->mm->end_data = end_data;
959         current->mm->start_stack = bprm->p;
960
961 #ifdef arch_randomize_brk
962         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
963                 current->mm->brk = current->mm->start_brk =
964                         arch_randomize_brk(current->mm);
965 #ifdef CONFIG_COMPAT_BRK
966                 current->brk_randomized = 1;
967 #endif
968         }
969 #endif
970
971         if (current->personality & MMAP_PAGE_ZERO) {
972                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
973                    and some applications "depend" upon this behavior.
974                    Since we do not have the power to recompile these, we
975                    emulate the SVr4 behavior. Sigh. */
976                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
977                                 MAP_FIXED | MAP_PRIVATE, 0);
978         }
979
980 #ifdef ELF_PLAT_INIT
981         /*
982          * The ABI may specify that certain registers be set up in special
983          * ways (on i386 %edx is the address of a DT_FINI function, for
984          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
985          * that the e_entry field is the address of the function descriptor
986          * for the startup routine, rather than the address of the startup
987          * routine itself.  This macro performs whatever initialization to
988          * the regs structure is required as well as any relocations to the
989          * function descriptor entries when executing dynamically links apps.
990          */
991         ELF_PLAT_INIT(regs, reloc_func_desc);
992 #endif
993
994         start_thread(regs, elf_entry, bprm->p);
995         retval = 0;
996 out:
997         kfree(loc);
998 out_ret:
999         return retval;
1000
1001         /* error cleanup */
1002 out_free_dentry:
1003         allow_write_access(interpreter);
1004         if (interpreter)
1005                 fput(interpreter);
1006 out_free_interp:
1007         kfree(elf_interpreter);
1008 out_free_ph:
1009         kfree(elf_phdata);
1010         goto out;
1011 }
1012
1013 /* This is really simpleminded and specialized - we are loading an
1014    a.out library that is given an ELF header. */
1015 static int load_elf_library(struct file *file)
1016 {
1017         struct elf_phdr *elf_phdata;
1018         struct elf_phdr *eppnt;
1019         unsigned long elf_bss, bss, len;
1020         int retval, error, i, j;
1021         struct elfhdr elf_ex;
1022
1023         error = -ENOEXEC;
1024         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1025         if (retval != sizeof(elf_ex))
1026                 goto out;
1027
1028         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1029                 goto out;
1030
1031         /* First of all, some simple consistency checks */
1032         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1033             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1034                 goto out;
1035
1036         /* Now read in all of the header information */
1037
1038         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1039         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1040
1041         error = -ENOMEM;
1042         elf_phdata = kmalloc(j, GFP_KERNEL);
1043         if (!elf_phdata)
1044                 goto out;
1045
1046         eppnt = elf_phdata;
1047         error = -ENOEXEC;
1048         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1049         if (retval != j)
1050                 goto out_free_ph;
1051
1052         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1053                 if ((eppnt + i)->p_type == PT_LOAD)
1054                         j++;
1055         if (j != 1)
1056                 goto out_free_ph;
1057
1058         while (eppnt->p_type != PT_LOAD)
1059                 eppnt++;
1060
1061         /* Now use mmap to map the library into memory. */
1062         error = vm_mmap(file,
1063                         ELF_PAGESTART(eppnt->p_vaddr),
1064                         (eppnt->p_filesz +
1065                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1066                         PROT_READ | PROT_WRITE | PROT_EXEC,
1067                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1068                         (eppnt->p_offset -
1069                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1070         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1071                 goto out_free_ph;
1072
1073         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1074         if (padzero(elf_bss)) {
1075                 error = -EFAULT;
1076                 goto out_free_ph;
1077         }
1078
1079         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1080                             ELF_MIN_ALIGN - 1);
1081         bss = eppnt->p_memsz + eppnt->p_vaddr;
1082         if (bss > len)
1083                 vm_brk(len, bss - len);
1084         error = 0;
1085
1086 out_free_ph:
1087         kfree(elf_phdata);
1088 out:
1089         return error;
1090 }
1091
1092 #ifdef CONFIG_ELF_CORE
1093 /*
1094  * ELF core dumper
1095  *
1096  * Modelled on fs/exec.c:aout_core_dump()
1097  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1098  */
1099
1100 /*
1101  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1102  * that are useful for post-mortem analysis are included in every core dump.
1103  * In that way we ensure that the core dump is fully interpretable later
1104  * without matching up the same kernel and hardware config to see what PC values
1105  * meant. These special mappings include - vDSO, vsyscall, and other
1106  * architecture specific mappings
1107  */
1108 static bool always_dump_vma(struct vm_area_struct *vma)
1109 {
1110         /* Any vsyscall mappings? */
1111         if (vma == get_gate_vma(vma->vm_mm))
1112                 return true;
1113         /*
1114          * arch_vma_name() returns non-NULL for special architecture mappings,
1115          * such as vDSO sections.
1116          */
1117         if (arch_vma_name(vma))
1118                 return true;
1119
1120         return false;
1121 }
1122
1123 /*
1124  * Decide what to dump of a segment, part, all or none.
1125  */
1126 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1127                                    unsigned long mm_flags)
1128 {
1129 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1130
1131         /* always dump the vdso and vsyscall sections */
1132         if (always_dump_vma(vma))
1133                 goto whole;
1134
1135         if (vma->vm_flags & VM_DONTDUMP)
1136                 return 0;
1137
1138         /* Hugetlb memory check */
1139         if (vma->vm_flags & VM_HUGETLB) {
1140                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1141                         goto whole;
1142                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1143                         goto whole;
1144                 return 0;
1145         }
1146
1147         /* Do not dump I/O mapped devices or special mappings */
1148         if (vma->vm_flags & VM_IO)
1149                 return 0;
1150
1151         /* By default, dump shared memory if mapped from an anonymous file. */
1152         if (vma->vm_flags & VM_SHARED) {
1153                 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1154                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1155                         goto whole;
1156                 return 0;
1157         }
1158
1159         /* Dump segments that have been written to.  */
1160         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1161                 goto whole;
1162         if (vma->vm_file == NULL)
1163                 return 0;
1164
1165         if (FILTER(MAPPED_PRIVATE))
1166                 goto whole;
1167
1168         /*
1169          * If this looks like the beginning of a DSO or executable mapping,
1170          * check for an ELF header.  If we find one, dump the first page to
1171          * aid in determining what was mapped here.
1172          */
1173         if (FILTER(ELF_HEADERS) &&
1174             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1175                 u32 __user *header = (u32 __user *) vma->vm_start;
1176                 u32 word;
1177                 mm_segment_t fs = get_fs();
1178                 /*
1179                  * Doing it this way gets the constant folded by GCC.
1180                  */
1181                 union {
1182                         u32 cmp;
1183                         char elfmag[SELFMAG];
1184                 } magic;
1185                 BUILD_BUG_ON(SELFMAG != sizeof word);
1186                 magic.elfmag[EI_MAG0] = ELFMAG0;
1187                 magic.elfmag[EI_MAG1] = ELFMAG1;
1188                 magic.elfmag[EI_MAG2] = ELFMAG2;
1189                 magic.elfmag[EI_MAG3] = ELFMAG3;
1190                 /*
1191                  * Switch to the user "segment" for get_user(),
1192                  * then put back what elf_core_dump() had in place.
1193                  */
1194                 set_fs(USER_DS);
1195                 if (unlikely(get_user(word, header)))
1196                         word = 0;
1197                 set_fs(fs);
1198                 if (word == magic.cmp)
1199                         return PAGE_SIZE;
1200         }
1201
1202 #undef  FILTER
1203
1204         return 0;
1205
1206 whole:
1207         return vma->vm_end - vma->vm_start;
1208 }
1209
1210 /* An ELF note in memory */
1211 struct memelfnote
1212 {
1213         const char *name;
1214         int type;
1215         unsigned int datasz;
1216         void *data;
1217 };
1218
1219 static int notesize(struct memelfnote *en)
1220 {
1221         int sz;
1222
1223         sz = sizeof(struct elf_note);
1224         sz += roundup(strlen(en->name) + 1, 4);
1225         sz += roundup(en->datasz, 4);
1226
1227         return sz;
1228 }
1229
1230 #define DUMP_WRITE(addr, nr, foffset)   \
1231         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1232
1233 static int alignfile(struct file *file, loff_t *foffset)
1234 {
1235         static const char buf[4] = { 0, };
1236         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1237         return 1;
1238 }
1239
1240 static int writenote(struct memelfnote *men, struct file *file,
1241                         loff_t *foffset)
1242 {
1243         struct elf_note en;
1244         en.n_namesz = strlen(men->name) + 1;
1245         en.n_descsz = men->datasz;
1246         en.n_type = men->type;
1247
1248         DUMP_WRITE(&en, sizeof(en), foffset);
1249         DUMP_WRITE(men->name, en.n_namesz, foffset);
1250         if (!alignfile(file, foffset))
1251                 return 0;
1252         DUMP_WRITE(men->data, men->datasz, foffset);
1253         if (!alignfile(file, foffset))
1254                 return 0;
1255
1256         return 1;
1257 }
1258 #undef DUMP_WRITE
1259
1260 static void fill_elf_header(struct elfhdr *elf, int segs,
1261                             u16 machine, u32 flags)
1262 {
1263         memset(elf, 0, sizeof(*elf));
1264
1265         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1266         elf->e_ident[EI_CLASS] = ELF_CLASS;
1267         elf->e_ident[EI_DATA] = ELF_DATA;
1268         elf->e_ident[EI_VERSION] = EV_CURRENT;
1269         elf->e_ident[EI_OSABI] = ELF_OSABI;
1270
1271         elf->e_type = ET_CORE;
1272         elf->e_machine = machine;
1273         elf->e_version = EV_CURRENT;
1274         elf->e_phoff = sizeof(struct elfhdr);
1275         elf->e_flags = flags;
1276         elf->e_ehsize = sizeof(struct elfhdr);
1277         elf->e_phentsize = sizeof(struct elf_phdr);
1278         elf->e_phnum = segs;
1279
1280         return;
1281 }
1282
1283 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1284 {
1285         phdr->p_type = PT_NOTE;
1286         phdr->p_offset = offset;
1287         phdr->p_vaddr = 0;
1288         phdr->p_paddr = 0;
1289         phdr->p_filesz = sz;
1290         phdr->p_memsz = 0;
1291         phdr->p_flags = 0;
1292         phdr->p_align = 0;
1293         return;
1294 }
1295
1296 static void fill_note(struct memelfnote *note, const char *name, int type, 
1297                 unsigned int sz, void *data)
1298 {
1299         note->name = name;
1300         note->type = type;
1301         note->datasz = sz;
1302         note->data = data;
1303         return;
1304 }
1305
1306 /*
1307  * fill up all the fields in prstatus from the given task struct, except
1308  * registers which need to be filled up separately.
1309  */
1310 static void fill_prstatus(struct elf_prstatus *prstatus,
1311                 struct task_struct *p, long signr)
1312 {
1313         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1314         prstatus->pr_sigpend = p->pending.signal.sig[0];
1315         prstatus->pr_sighold = p->blocked.sig[0];
1316         rcu_read_lock();
1317         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1318         rcu_read_unlock();
1319         prstatus->pr_pid = task_pid_vnr(p);
1320         prstatus->pr_pgrp = task_pgrp_vnr(p);
1321         prstatus->pr_sid = task_session_vnr(p);
1322         if (thread_group_leader(p)) {
1323                 struct task_cputime cputime;
1324
1325                 /*
1326                  * This is the record for the group leader.  It shows the
1327                  * group-wide total, not its individual thread total.
1328                  */
1329                 thread_group_cputime(p, &cputime);
1330                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1331                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1332         } else {
1333                 cputime_t utime, stime;
1334
1335                 task_cputime(p, &utime, &stime);
1336                 cputime_to_timeval(utime, &prstatus->pr_utime);
1337                 cputime_to_timeval(stime, &prstatus->pr_stime);
1338         }
1339         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1340         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1341 }
1342
1343 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1344                        struct mm_struct *mm)
1345 {
1346         const struct cred *cred;
1347         unsigned int i, len;
1348         
1349         /* first copy the parameters from user space */
1350         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1351
1352         len = mm->arg_end - mm->arg_start;
1353         if (len >= ELF_PRARGSZ)
1354                 len = ELF_PRARGSZ-1;
1355         if (copy_from_user(&psinfo->pr_psargs,
1356                            (const char __user *)mm->arg_start, len))
1357                 return -EFAULT;
1358         for(i = 0; i < len; i++)
1359                 if (psinfo->pr_psargs[i] == 0)
1360                         psinfo->pr_psargs[i] = ' ';
1361         psinfo->pr_psargs[len] = 0;
1362
1363         rcu_read_lock();
1364         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1365         rcu_read_unlock();
1366         psinfo->pr_pid = task_pid_vnr(p);
1367         psinfo->pr_pgrp = task_pgrp_vnr(p);
1368         psinfo->pr_sid = task_session_vnr(p);
1369
1370         i = p->state ? ffz(~p->state) + 1 : 0;
1371         psinfo->pr_state = i;
1372         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1373         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1374         psinfo->pr_nice = task_nice(p);
1375         psinfo->pr_flag = p->flags;
1376         rcu_read_lock();
1377         cred = __task_cred(p);
1378         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1379         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1380         rcu_read_unlock();
1381         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1382         
1383         return 0;
1384 }
1385
1386 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1387 {
1388         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1389         int i = 0;
1390         do
1391                 i += 2;
1392         while (auxv[i - 2] != AT_NULL);
1393         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1394 }
1395
1396 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1397                 siginfo_t *siginfo)
1398 {
1399         mm_segment_t old_fs = get_fs();
1400         set_fs(KERNEL_DS);
1401         copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1402         set_fs(old_fs);
1403         fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1404 }
1405
1406 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1407 /*
1408  * Format of NT_FILE note:
1409  *
1410  * long count     -- how many files are mapped
1411  * long page_size -- units for file_ofs
1412  * array of [COUNT] elements of
1413  *   long start
1414  *   long end
1415  *   long file_ofs
1416  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1417  */
1418 static void fill_files_note(struct memelfnote *note)
1419 {
1420         struct vm_area_struct *vma;
1421         unsigned count, size, names_ofs, remaining, n;
1422         user_long_t *data;
1423         user_long_t *start_end_ofs;
1424         char *name_base, *name_curpos;
1425
1426         /* *Estimated* file count and total data size needed */
1427         count = current->mm->map_count;
1428         size = count * 64;
1429
1430         names_ofs = (2 + 3 * count) * sizeof(data[0]);
1431  alloc:
1432         if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1433                 goto err;
1434         size = round_up(size, PAGE_SIZE);
1435         data = vmalloc(size);
1436         if (!data)
1437                 goto err;
1438
1439         start_end_ofs = data + 2;
1440         name_base = name_curpos = ((char *)data) + names_ofs;
1441         remaining = size - names_ofs;
1442         count = 0;
1443         for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1444                 struct file *file;
1445                 const char *filename;
1446
1447                 file = vma->vm_file;
1448                 if (!file)
1449                         continue;
1450                 filename = d_path(&file->f_path, name_curpos, remaining);
1451                 if (IS_ERR(filename)) {
1452                         if (PTR_ERR(filename) == -ENAMETOOLONG) {
1453                                 vfree(data);
1454                                 size = size * 5 / 4;
1455                                 goto alloc;
1456                         }
1457                         continue;
1458                 }
1459
1460                 /* d_path() fills at the end, move name down */
1461                 /* n = strlen(filename) + 1: */
1462                 n = (name_curpos + remaining) - filename;
1463                 remaining = filename - name_curpos;
1464                 memmove(name_curpos, filename, n);
1465                 name_curpos += n;
1466
1467                 *start_end_ofs++ = vma->vm_start;
1468                 *start_end_ofs++ = vma->vm_end;
1469                 *start_end_ofs++ = vma->vm_pgoff;
1470                 count++;
1471         }
1472
1473         /* Now we know exact count of files, can store it */
1474         data[0] = count;
1475         data[1] = PAGE_SIZE;
1476         /*
1477          * Count usually is less than current->mm->map_count,
1478          * we need to move filenames down.
1479          */
1480         n = current->mm->map_count - count;
1481         if (n != 0) {
1482                 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1483                 memmove(name_base - shift_bytes, name_base,
1484                         name_curpos - name_base);
1485                 name_curpos -= shift_bytes;
1486         }
1487
1488         size = name_curpos - (char *)data;
1489         fill_note(note, "CORE", NT_FILE, size, data);
1490  err: ;
1491 }
1492
1493 #ifdef CORE_DUMP_USE_REGSET
1494 #include <linux/regset.h>
1495
1496 struct elf_thread_core_info {
1497         struct elf_thread_core_info *next;
1498         struct task_struct *task;
1499         struct elf_prstatus prstatus;
1500         struct memelfnote notes[0];
1501 };
1502
1503 struct elf_note_info {
1504         struct elf_thread_core_info *thread;
1505         struct memelfnote psinfo;
1506         struct memelfnote signote;
1507         struct memelfnote auxv;
1508         struct memelfnote files;
1509         user_siginfo_t csigdata;
1510         size_t size;
1511         int thread_notes;
1512 };
1513
1514 /*
1515  * When a regset has a writeback hook, we call it on each thread before
1516  * dumping user memory.  On register window machines, this makes sure the
1517  * user memory backing the register data is up to date before we read it.
1518  */
1519 static void do_thread_regset_writeback(struct task_struct *task,
1520                                        const struct user_regset *regset)
1521 {
1522         if (regset->writeback)
1523                 regset->writeback(task, regset, 1);
1524 }
1525
1526 #ifndef PR_REG_SIZE
1527 #define PR_REG_SIZE(S) sizeof(S)
1528 #endif
1529
1530 #ifndef PRSTATUS_SIZE
1531 #define PRSTATUS_SIZE(S) sizeof(S)
1532 #endif
1533
1534 #ifndef PR_REG_PTR
1535 #define PR_REG_PTR(S) (&((S)->pr_reg))
1536 #endif
1537
1538 #ifndef SET_PR_FPVALID
1539 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1540 #endif
1541
1542 static int fill_thread_core_info(struct elf_thread_core_info *t,
1543                                  const struct user_regset_view *view,
1544                                  long signr, size_t *total)
1545 {
1546         unsigned int i;
1547
1548         /*
1549          * NT_PRSTATUS is the one special case, because the regset data
1550          * goes into the pr_reg field inside the note contents, rather
1551          * than being the whole note contents.  We fill the reset in here.
1552          * We assume that regset 0 is NT_PRSTATUS.
1553          */
1554         fill_prstatus(&t->prstatus, t->task, signr);
1555         (void) view->regsets[0].get(t->task, &view->regsets[0],
1556                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1557                                     PR_REG_PTR(&t->prstatus), NULL);
1558
1559         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1560                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1561         *total += notesize(&t->notes[0]);
1562
1563         do_thread_regset_writeback(t->task, &view->regsets[0]);
1564
1565         /*
1566          * Each other regset might generate a note too.  For each regset
1567          * that has no core_note_type or is inactive, we leave t->notes[i]
1568          * all zero and we'll know to skip writing it later.
1569          */
1570         for (i = 1; i < view->n; ++i) {
1571                 const struct user_regset *regset = &view->regsets[i];
1572                 do_thread_regset_writeback(t->task, regset);
1573                 if (regset->core_note_type && regset->get &&
1574                     (!regset->active || regset->active(t->task, regset))) {
1575                         int ret;
1576                         size_t size = regset->n * regset->size;
1577                         void *data = kmalloc(size, GFP_KERNEL);
1578                         if (unlikely(!data))
1579                                 return 0;
1580                         ret = regset->get(t->task, regset,
1581                                           0, size, data, NULL);
1582                         if (unlikely(ret))
1583                                 kfree(data);
1584                         else {
1585                                 if (regset->core_note_type != NT_PRFPREG)
1586                                         fill_note(&t->notes[i], "LINUX",
1587                                                   regset->core_note_type,
1588                                                   size, data);
1589                                 else {
1590                                         SET_PR_FPVALID(&t->prstatus, 1);
1591                                         fill_note(&t->notes[i], "CORE",
1592                                                   NT_PRFPREG, size, data);
1593                                 }
1594                                 *total += notesize(&t->notes[i]);
1595                         }
1596                 }
1597         }
1598
1599         return 1;
1600 }
1601
1602 static int fill_note_info(struct elfhdr *elf, int phdrs,
1603                           struct elf_note_info *info,
1604                           siginfo_t *siginfo, struct pt_regs *regs)
1605 {
1606         struct task_struct *dump_task = current;
1607         const struct user_regset_view *view = task_user_regset_view(dump_task);
1608         struct elf_thread_core_info *t;
1609         struct elf_prpsinfo *psinfo;
1610         struct core_thread *ct;
1611         unsigned int i;
1612
1613         info->size = 0;
1614         info->thread = NULL;
1615
1616         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1617         if (psinfo == NULL) {
1618                 info->psinfo.data = NULL; /* So we don't free this wrongly */
1619                 return 0;
1620         }
1621
1622         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1623
1624         /*
1625          * Figure out how many notes we're going to need for each thread.
1626          */
1627         info->thread_notes = 0;
1628         for (i = 0; i < view->n; ++i)
1629                 if (view->regsets[i].core_note_type != 0)
1630                         ++info->thread_notes;
1631
1632         /*
1633          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1634          * since it is our one special case.
1635          */
1636         if (unlikely(info->thread_notes == 0) ||
1637             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1638                 WARN_ON(1);
1639                 return 0;
1640         }
1641
1642         /*
1643          * Initialize the ELF file header.
1644          */
1645         fill_elf_header(elf, phdrs,
1646                         view->e_machine, view->e_flags);
1647
1648         /*
1649          * Allocate a structure for each thread.
1650          */
1651         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1652                 t = kzalloc(offsetof(struct elf_thread_core_info,
1653                                      notes[info->thread_notes]),
1654                             GFP_KERNEL);
1655                 if (unlikely(!t))
1656                         return 0;
1657
1658                 t->task = ct->task;
1659                 if (ct->task == dump_task || !info->thread) {
1660                         t->next = info->thread;
1661                         info->thread = t;
1662                 } else {
1663                         /*
1664                          * Make sure to keep the original task at
1665                          * the head of the list.
1666                          */
1667                         t->next = info->thread->next;
1668                         info->thread->next = t;
1669                 }
1670         }
1671
1672         /*
1673          * Now fill in each thread's information.
1674          */
1675         for (t = info->thread; t != NULL; t = t->next)
1676                 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1677                         return 0;
1678
1679         /*
1680          * Fill in the two process-wide notes.
1681          */
1682         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1683         info->size += notesize(&info->psinfo);
1684
1685         fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1686         info->size += notesize(&info->signote);
1687
1688         fill_auxv_note(&info->auxv, current->mm);
1689         info->size += notesize(&info->auxv);
1690
1691         fill_files_note(&info->files);
1692         info->size += notesize(&info->files);
1693
1694         return 1;
1695 }
1696
1697 static size_t get_note_info_size(struct elf_note_info *info)
1698 {
1699         return info->size;
1700 }
1701
1702 /*
1703  * Write all the notes for each thread.  When writing the first thread, the
1704  * process-wide notes are interleaved after the first thread-specific note.
1705  */
1706 static int write_note_info(struct elf_note_info *info,
1707                            struct file *file, loff_t *foffset)
1708 {
1709         bool first = 1;
1710         struct elf_thread_core_info *t = info->thread;
1711
1712         do {
1713                 int i;
1714
1715                 if (!writenote(&t->notes[0], file, foffset))
1716                         return 0;
1717
1718                 if (first && !writenote(&info->psinfo, file, foffset))
1719                         return 0;
1720                 if (first && !writenote(&info->signote, file, foffset))
1721                         return 0;
1722                 if (first && !writenote(&info->auxv, file, foffset))
1723                         return 0;
1724                 if (first && !writenote(&info->files, file, foffset))
1725                         return 0;
1726
1727                 for (i = 1; i < info->thread_notes; ++i)
1728                         if (t->notes[i].data &&
1729                             !writenote(&t->notes[i], file, foffset))
1730                                 return 0;
1731
1732                 first = 0;
1733                 t = t->next;
1734         } while (t);
1735
1736         return 1;
1737 }
1738
1739 static void free_note_info(struct elf_note_info *info)
1740 {
1741         struct elf_thread_core_info *threads = info->thread;
1742         while (threads) {
1743                 unsigned int i;
1744                 struct elf_thread_core_info *t = threads;
1745                 threads = t->next;
1746                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1747                 for (i = 1; i < info->thread_notes; ++i)
1748                         kfree(t->notes[i].data);
1749                 kfree(t);
1750         }
1751         kfree(info->psinfo.data);
1752         vfree(info->files.data);
1753 }
1754
1755 #else
1756
1757 /* Here is the structure in which status of each thread is captured. */
1758 struct elf_thread_status
1759 {
1760         struct list_head list;
1761         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1762         elf_fpregset_t fpu;             /* NT_PRFPREG */
1763         struct task_struct *thread;
1764 #ifdef ELF_CORE_COPY_XFPREGS
1765         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1766 #endif
1767         struct memelfnote notes[3];
1768         int num_notes;
1769 };
1770
1771 /*
1772  * In order to add the specific thread information for the elf file format,
1773  * we need to keep a linked list of every threads pr_status and then create
1774  * a single section for them in the final core file.
1775  */
1776 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1777 {
1778         int sz = 0;
1779         struct task_struct *p = t->thread;
1780         t->num_notes = 0;
1781
1782         fill_prstatus(&t->prstatus, p, signr);
1783         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1784         
1785         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1786                   &(t->prstatus));
1787         t->num_notes++;
1788         sz += notesize(&t->notes[0]);
1789
1790         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1791                                                                 &t->fpu))) {
1792                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1793                           &(t->fpu));
1794                 t->num_notes++;
1795                 sz += notesize(&t->notes[1]);
1796         }
1797
1798 #ifdef ELF_CORE_COPY_XFPREGS
1799         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1800                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1801                           sizeof(t->xfpu), &t->xfpu);
1802                 t->num_notes++;
1803                 sz += notesize(&t->notes[2]);
1804         }
1805 #endif  
1806         return sz;
1807 }
1808
1809 struct elf_note_info {
1810         struct memelfnote *notes;
1811         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1812         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1813         struct list_head thread_list;
1814         elf_fpregset_t *fpu;
1815 #ifdef ELF_CORE_COPY_XFPREGS
1816         elf_fpxregset_t *xfpu;
1817 #endif
1818         user_siginfo_t csigdata;
1819         int thread_status_size;
1820         int numnote;
1821 };
1822
1823 static int elf_note_info_init(struct elf_note_info *info)
1824 {
1825         memset(info, 0, sizeof(*info));
1826         INIT_LIST_HEAD(&info->thread_list);
1827
1828         /* Allocate space for ELF notes */
1829         info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1830         if (!info->notes)
1831                 return 0;
1832         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1833         if (!info->psinfo)
1834                 return 0;
1835         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1836         if (!info->prstatus)
1837                 return 0;
1838         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1839         if (!info->fpu)
1840                 return 0;
1841 #ifdef ELF_CORE_COPY_XFPREGS
1842         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1843         if (!info->xfpu)
1844                 return 0;
1845 #endif
1846         return 1;
1847 }
1848
1849 static int fill_note_info(struct elfhdr *elf, int phdrs,
1850                           struct elf_note_info *info,
1851                           siginfo_t *siginfo, struct pt_regs *regs)
1852 {
1853         struct list_head *t;
1854
1855         if (!elf_note_info_init(info))
1856                 return 0;
1857
1858         if (siginfo->si_signo) {
1859                 struct core_thread *ct;
1860                 struct elf_thread_status *ets;
1861
1862                 for (ct = current->mm->core_state->dumper.next;
1863                                                 ct; ct = ct->next) {
1864                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1865                         if (!ets)
1866                                 return 0;
1867
1868                         ets->thread = ct->task;
1869                         list_add(&ets->list, &info->thread_list);
1870                 }
1871
1872                 list_for_each(t, &info->thread_list) {
1873                         int sz;
1874
1875                         ets = list_entry(t, struct elf_thread_status, list);
1876                         sz = elf_dump_thread_status(siginfo->si_signo, ets);
1877                         info->thread_status_size += sz;
1878                 }
1879         }
1880         /* now collect the dump for the current */
1881         memset(info->prstatus, 0, sizeof(*info->prstatus));
1882         fill_prstatus(info->prstatus, current, siginfo->si_signo);
1883         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1884
1885         /* Set up header */
1886         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1887
1888         /*
1889          * Set up the notes in similar form to SVR4 core dumps made
1890          * with info from their /proc.
1891          */
1892
1893         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1894                   sizeof(*info->prstatus), info->prstatus);
1895         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1896         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1897                   sizeof(*info->psinfo), info->psinfo);
1898
1899         fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1900         fill_auxv_note(info->notes + 3, current->mm);
1901         fill_files_note(info->notes + 4);
1902
1903         info->numnote = 5;
1904
1905         /* Try to dump the FPU. */
1906         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1907                                                                info->fpu);
1908         if (info->prstatus->pr_fpvalid)
1909                 fill_note(info->notes + info->numnote++,
1910                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1911 #ifdef ELF_CORE_COPY_XFPREGS
1912         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1913                 fill_note(info->notes + info->numnote++,
1914                           "LINUX", ELF_CORE_XFPREG_TYPE,
1915                           sizeof(*info->xfpu), info->xfpu);
1916 #endif
1917
1918         return 1;
1919 }
1920
1921 static size_t get_note_info_size(struct elf_note_info *info)
1922 {
1923         int sz = 0;
1924         int i;
1925
1926         for (i = 0; i < info->numnote; i++)
1927                 sz += notesize(info->notes + i);
1928
1929         sz += info->thread_status_size;
1930
1931         return sz;
1932 }
1933
1934 static int write_note_info(struct elf_note_info *info,
1935                            struct file *file, loff_t *foffset)
1936 {
1937         int i;
1938         struct list_head *t;
1939
1940         for (i = 0; i < info->numnote; i++)
1941                 if (!writenote(info->notes + i, file, foffset))
1942                         return 0;
1943
1944         /* write out the thread status notes section */
1945         list_for_each(t, &info->thread_list) {
1946                 struct elf_thread_status *tmp =
1947                                 list_entry(t, struct elf_thread_status, list);
1948
1949                 for (i = 0; i < tmp->num_notes; i++)
1950                         if (!writenote(&tmp->notes[i], file, foffset))
1951                                 return 0;
1952         }
1953
1954         return 1;
1955 }
1956
1957 static void free_note_info(struct elf_note_info *info)
1958 {
1959         while (!list_empty(&info->thread_list)) {
1960                 struct list_head *tmp = info->thread_list.next;
1961                 list_del(tmp);
1962                 kfree(list_entry(tmp, struct elf_thread_status, list));
1963         }
1964
1965         /* Free data allocated by fill_files_note(): */
1966         vfree(info->notes[4].data);
1967
1968         kfree(info->prstatus);
1969         kfree(info->psinfo);
1970         kfree(info->notes);
1971         kfree(info->fpu);
1972 #ifdef ELF_CORE_COPY_XFPREGS
1973         kfree(info->xfpu);
1974 #endif
1975 }
1976
1977 #endif
1978
1979 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1980                                         struct vm_area_struct *gate_vma)
1981 {
1982         struct vm_area_struct *ret = tsk->mm->mmap;
1983
1984         if (ret)
1985                 return ret;
1986         return gate_vma;
1987 }
1988 /*
1989  * Helper function for iterating across a vma list.  It ensures that the caller
1990  * will visit `gate_vma' prior to terminating the search.
1991  */
1992 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1993                                         struct vm_area_struct *gate_vma)
1994 {
1995         struct vm_area_struct *ret;
1996
1997         ret = this_vma->vm_next;
1998         if (ret)
1999                 return ret;
2000         if (this_vma == gate_vma)
2001                 return NULL;
2002         return gate_vma;
2003 }
2004
2005 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2006                              elf_addr_t e_shoff, int segs)
2007 {
2008         elf->e_shoff = e_shoff;
2009         elf->e_shentsize = sizeof(*shdr4extnum);
2010         elf->e_shnum = 1;
2011         elf->e_shstrndx = SHN_UNDEF;
2012
2013         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2014
2015         shdr4extnum->sh_type = SHT_NULL;
2016         shdr4extnum->sh_size = elf->e_shnum;
2017         shdr4extnum->sh_link = elf->e_shstrndx;
2018         shdr4extnum->sh_info = segs;
2019 }
2020
2021 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2022                                      unsigned long mm_flags)
2023 {
2024         struct vm_area_struct *vma;
2025         size_t size = 0;
2026
2027         for (vma = first_vma(current, gate_vma); vma != NULL;
2028              vma = next_vma(vma, gate_vma))
2029                 size += vma_dump_size(vma, mm_flags);
2030         return size;
2031 }
2032
2033 /*
2034  * Actual dumper
2035  *
2036  * This is a two-pass process; first we find the offsets of the bits,
2037  * and then they are actually written out.  If we run out of core limit
2038  * we just truncate.
2039  */
2040 static int elf_core_dump(struct coredump_params *cprm)
2041 {
2042         int has_dumped = 0;
2043         mm_segment_t fs;
2044         int segs;
2045         size_t size = 0;
2046         struct vm_area_struct *vma, *gate_vma;
2047         struct elfhdr *elf = NULL;
2048         loff_t offset = 0, dataoff, foffset;
2049         struct elf_note_info info;
2050         struct elf_phdr *phdr4note = NULL;
2051         struct elf_shdr *shdr4extnum = NULL;
2052         Elf_Half e_phnum;
2053         elf_addr_t e_shoff;
2054
2055         /*
2056          * We no longer stop all VM operations.
2057          * 
2058          * This is because those proceses that could possibly change map_count
2059          * or the mmap / vma pages are now blocked in do_exit on current
2060          * finishing this core dump.
2061          *
2062          * Only ptrace can touch these memory addresses, but it doesn't change
2063          * the map_count or the pages allocated. So no possibility of crashing
2064          * exists while dumping the mm->vm_next areas to the core file.
2065          */
2066   
2067         /* alloc memory for large data structures: too large to be on stack */
2068         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2069         if (!elf)
2070                 goto out;
2071         /*
2072          * The number of segs are recored into ELF header as 16bit value.
2073          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2074          */
2075         segs = current->mm->map_count;
2076         segs += elf_core_extra_phdrs();
2077
2078         gate_vma = get_gate_vma(current->mm);
2079         if (gate_vma != NULL)
2080                 segs++;
2081
2082         /* for notes section */
2083         segs++;
2084
2085         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2086          * this, kernel supports extended numbering. Have a look at
2087          * include/linux/elf.h for further information. */
2088         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2089
2090         /*
2091          * Collect all the non-memory information about the process for the
2092          * notes.  This also sets up the file header.
2093          */
2094         if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2095                 goto cleanup;
2096
2097         has_dumped = 1;
2098
2099         fs = get_fs();
2100         set_fs(KERNEL_DS);
2101
2102         offset += sizeof(*elf);                         /* Elf header */
2103         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2104         foffset = offset;
2105
2106         /* Write notes phdr entry */
2107         {
2108                 size_t sz = get_note_info_size(&info);
2109
2110                 sz += elf_coredump_extra_notes_size();
2111
2112                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2113                 if (!phdr4note)
2114                         goto end_coredump;
2115
2116                 fill_elf_note_phdr(phdr4note, sz, offset);
2117                 offset += sz;
2118         }
2119
2120         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2121
2122         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2123         offset += elf_core_extra_data_size();
2124         e_shoff = offset;
2125
2126         if (e_phnum == PN_XNUM) {
2127                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2128                 if (!shdr4extnum)
2129                         goto end_coredump;
2130                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2131         }
2132
2133         offset = dataoff;
2134
2135         size += sizeof(*elf);
2136         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2137                 goto end_coredump;
2138
2139         size += sizeof(*phdr4note);
2140         if (size > cprm->limit
2141             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2142                 goto end_coredump;
2143
2144         /* Write program headers for segments dump */
2145         for (vma = first_vma(current, gate_vma); vma != NULL;
2146                         vma = next_vma(vma, gate_vma)) {
2147                 struct elf_phdr phdr;
2148
2149                 phdr.p_type = PT_LOAD;
2150                 phdr.p_offset = offset;
2151                 phdr.p_vaddr = vma->vm_start;
2152                 phdr.p_paddr = 0;
2153                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2154                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2155                 offset += phdr.p_filesz;
2156                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2157                 if (vma->vm_flags & VM_WRITE)
2158                         phdr.p_flags |= PF_W;
2159                 if (vma->vm_flags & VM_EXEC)
2160                         phdr.p_flags |= PF_X;
2161                 phdr.p_align = ELF_EXEC_PAGESIZE;
2162
2163                 size += sizeof(phdr);
2164                 if (size > cprm->limit
2165                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2166                         goto end_coredump;
2167         }
2168
2169         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2170                 goto end_coredump;
2171
2172         /* write out the notes section */
2173         if (!write_note_info(&info, cprm->file, &foffset))
2174                 goto end_coredump;
2175
2176         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2177                 goto end_coredump;
2178
2179         /* Align to page */
2180         if (!dump_seek(cprm->file, dataoff - foffset))
2181                 goto end_coredump;
2182
2183         for (vma = first_vma(current, gate_vma); vma != NULL;
2184                         vma = next_vma(vma, gate_vma)) {
2185                 unsigned long addr;
2186                 unsigned long end;
2187
2188                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2189
2190                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2191                         struct page *page;
2192                         int stop;
2193
2194                         page = get_dump_page(addr);
2195                         if (page) {
2196                                 void *kaddr = kmap(page);
2197                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2198                                         !dump_write(cprm->file, kaddr,
2199                                                     PAGE_SIZE);
2200                                 kunmap(page);
2201                                 page_cache_release(page);
2202                         } else
2203                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2204                         if (stop)
2205                                 goto end_coredump;
2206                 }
2207         }
2208
2209         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2210                 goto end_coredump;
2211
2212         if (e_phnum == PN_XNUM) {
2213                 size += sizeof(*shdr4extnum);
2214                 if (size > cprm->limit
2215                     || !dump_write(cprm->file, shdr4extnum,
2216                                    sizeof(*shdr4extnum)))
2217                         goto end_coredump;
2218         }
2219
2220 end_coredump:
2221         set_fs(fs);
2222
2223 cleanup:
2224         free_note_info(&info);
2225         kfree(shdr4extnum);
2226         kfree(phdr4note);
2227         kfree(elf);
2228 out:
2229         return has_dumped;
2230 }
2231
2232 #endif          /* CONFIG_ELF_CORE */
2233
2234 static int __init init_elf_binfmt(void)
2235 {
2236         register_binfmt(&elf_format);
2237         return 0;
2238 }
2239
2240 static void __exit exit_elf_binfmt(void)
2241 {
2242         /* Remove the COFF and ELF loaders. */
2243         unregister_binfmt(&elf_format);
2244 }
2245
2246 core_initcall(init_elf_binfmt);
2247 module_exit(exit_elf_binfmt);
2248 MODULE_LICENSE("GPL");