Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git...
[platform/adaptation/renesas_rcar/renesas_kernel.git] / fs / coredump.c
1 #include <linux/slab.h>
2 #include <linux/file.h>
3 #include <linux/fdtable.h>
4 #include <linux/mm.h>
5 #include <linux/stat.h>
6 #include <linux/fcntl.h>
7 #include <linux/swap.h>
8 #include <linux/string.h>
9 #include <linux/init.h>
10 #include <linux/pagemap.h>
11 #include <linux/perf_event.h>
12 #include <linux/highmem.h>
13 #include <linux/spinlock.h>
14 #include <linux/key.h>
15 #include <linux/personality.h>
16 #include <linux/binfmts.h>
17 #include <linux/coredump.h>
18 #include <linux/utsname.h>
19 #include <linux/pid_namespace.h>
20 #include <linux/module.h>
21 #include <linux/namei.h>
22 #include <linux/mount.h>
23 #include <linux/security.h>
24 #include <linux/syscalls.h>
25 #include <linux/tsacct_kern.h>
26 #include <linux/cn_proc.h>
27 #include <linux/audit.h>
28 #include <linux/tracehook.h>
29 #include <linux/kmod.h>
30 #include <linux/fsnotify.h>
31 #include <linux/fs_struct.h>
32 #include <linux/pipe_fs_i.h>
33 #include <linux/oom.h>
34 #include <linux/compat.h>
35
36 #include <asm/uaccess.h>
37 #include <asm/mmu_context.h>
38 #include <asm/tlb.h>
39 #include <asm/exec.h>
40
41 #include <trace/events/task.h>
42 #include "internal.h"
43
44 #include <trace/events/sched.h>
45
46 int core_uses_pid;
47 unsigned int core_pipe_limit;
48 char core_pattern[CORENAME_MAX_SIZE] = "core";
49 static int core_name_size = CORENAME_MAX_SIZE;
50
51 struct core_name {
52         char *corename;
53         int used, size;
54 };
55
56 /* The maximal length of core_pattern is also specified in sysctl.c */
57
58 static int expand_corename(struct core_name *cn, int size)
59 {
60         char *corename = krealloc(cn->corename, size, GFP_KERNEL);
61
62         if (!corename)
63                 return -ENOMEM;
64
65         if (size > core_name_size) /* racy but harmless */
66                 core_name_size = size;
67
68         cn->size = ksize(corename);
69         cn->corename = corename;
70         return 0;
71 }
72
73 static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg)
74 {
75         int free, need;
76
77 again:
78         free = cn->size - cn->used;
79         need = vsnprintf(cn->corename + cn->used, free, fmt, arg);
80         if (need < free) {
81                 cn->used += need;
82                 return 0;
83         }
84
85         if (!expand_corename(cn, cn->size + need - free + 1))
86                 goto again;
87
88         return -ENOMEM;
89 }
90
91 static int cn_printf(struct core_name *cn, const char *fmt, ...)
92 {
93         va_list arg;
94         int ret;
95
96         va_start(arg, fmt);
97         ret = cn_vprintf(cn, fmt, arg);
98         va_end(arg);
99
100         return ret;
101 }
102
103 static int cn_esc_printf(struct core_name *cn, const char *fmt, ...)
104 {
105         int cur = cn->used;
106         va_list arg;
107         int ret;
108
109         va_start(arg, fmt);
110         ret = cn_vprintf(cn, fmt, arg);
111         va_end(arg);
112
113         for (; cur < cn->used; ++cur) {
114                 if (cn->corename[cur] == '/')
115                         cn->corename[cur] = '!';
116         }
117         return ret;
118 }
119
120 static int cn_print_exe_file(struct core_name *cn)
121 {
122         struct file *exe_file;
123         char *pathbuf, *path;
124         int ret;
125
126         exe_file = get_mm_exe_file(current->mm);
127         if (!exe_file)
128                 return cn_esc_printf(cn, "%s (path unknown)", current->comm);
129
130         pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
131         if (!pathbuf) {
132                 ret = -ENOMEM;
133                 goto put_exe_file;
134         }
135
136         path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
137         if (IS_ERR(path)) {
138                 ret = PTR_ERR(path);
139                 goto free_buf;
140         }
141
142         ret = cn_esc_printf(cn, "%s", path);
143
144 free_buf:
145         kfree(pathbuf);
146 put_exe_file:
147         fput(exe_file);
148         return ret;
149 }
150
151 /* format_corename will inspect the pattern parameter, and output a
152  * name into corename, which must have space for at least
153  * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
154  */
155 static int format_corename(struct core_name *cn, struct coredump_params *cprm)
156 {
157         const struct cred *cred = current_cred();
158         const char *pat_ptr = core_pattern;
159         int ispipe = (*pat_ptr == '|');
160         int pid_in_pattern = 0;
161         int err = 0;
162
163         cn->used = 0;
164         cn->corename = NULL;
165         if (expand_corename(cn, core_name_size))
166                 return -ENOMEM;
167         cn->corename[0] = '\0';
168
169         if (ispipe)
170                 ++pat_ptr;
171
172         /* Repeat as long as we have more pattern to process and more output
173            space */
174         while (*pat_ptr) {
175                 if (*pat_ptr != '%') {
176                         err = cn_printf(cn, "%c", *pat_ptr++);
177                 } else {
178                         switch (*++pat_ptr) {
179                         /* single % at the end, drop that */
180                         case 0:
181                                 goto out;
182                         /* Double percent, output one percent */
183                         case '%':
184                                 err = cn_printf(cn, "%c", '%');
185                                 break;
186                         /* pid */
187                         case 'p':
188                                 pid_in_pattern = 1;
189                                 err = cn_printf(cn, "%d",
190                                               task_tgid_vnr(current));
191                                 break;
192                         /* global pid */
193                         case 'P':
194                                 err = cn_printf(cn, "%d",
195                                               task_tgid_nr(current));
196                                 break;
197                         /* uid */
198                         case 'u':
199                                 err = cn_printf(cn, "%d", cred->uid);
200                                 break;
201                         /* gid */
202                         case 'g':
203                                 err = cn_printf(cn, "%d", cred->gid);
204                                 break;
205                         case 'd':
206                                 err = cn_printf(cn, "%d",
207                                         __get_dumpable(cprm->mm_flags));
208                                 break;
209                         /* signal that caused the coredump */
210                         case 's':
211                                 err = cn_printf(cn, "%ld", cprm->siginfo->si_signo);
212                                 break;
213                         /* UNIX time of coredump */
214                         case 't': {
215                                 struct timeval tv;
216                                 do_gettimeofday(&tv);
217                                 err = cn_printf(cn, "%lu", tv.tv_sec);
218                                 break;
219                         }
220                         /* hostname */
221                         case 'h':
222                                 down_read(&uts_sem);
223                                 err = cn_esc_printf(cn, "%s",
224                                               utsname()->nodename);
225                                 up_read(&uts_sem);
226                                 break;
227                         /* executable */
228                         case 'e':
229                                 err = cn_esc_printf(cn, "%s", current->comm);
230                                 break;
231                         case 'E':
232                                 err = cn_print_exe_file(cn);
233                                 break;
234                         /* core limit size */
235                         case 'c':
236                                 err = cn_printf(cn, "%lu",
237                                               rlimit(RLIMIT_CORE));
238                                 break;
239                         default:
240                                 break;
241                         }
242                         ++pat_ptr;
243                 }
244
245                 if (err)
246                         return err;
247         }
248
249 out:
250         /* Backward compatibility with core_uses_pid:
251          *
252          * If core_pattern does not include a %p (as is the default)
253          * and core_uses_pid is set, then .%pid will be appended to
254          * the filename. Do not do this for piped commands. */
255         if (!ispipe && !pid_in_pattern && core_uses_pid) {
256                 err = cn_printf(cn, ".%d", task_tgid_vnr(current));
257                 if (err)
258                         return err;
259         }
260         return ispipe;
261 }
262
263 static int zap_process(struct task_struct *start, int exit_code)
264 {
265         struct task_struct *t;
266         int nr = 0;
267
268         start->signal->group_exit_code = exit_code;
269         start->signal->group_stop_count = 0;
270
271         t = start;
272         do {
273                 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
274                 if (t != current && t->mm) {
275                         sigaddset(&t->pending.signal, SIGKILL);
276                         signal_wake_up(t, 1);
277                         nr++;
278                 }
279         } while_each_thread(start, t);
280
281         return nr;
282 }
283
284 static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
285                         struct core_state *core_state, int exit_code)
286 {
287         struct task_struct *g, *p;
288         unsigned long flags;
289         int nr = -EAGAIN;
290
291         spin_lock_irq(&tsk->sighand->siglock);
292         if (!signal_group_exit(tsk->signal)) {
293                 mm->core_state = core_state;
294                 nr = zap_process(tsk, exit_code);
295                 tsk->signal->group_exit_task = tsk;
296                 /* ignore all signals except SIGKILL, see prepare_signal() */
297                 tsk->signal->flags = SIGNAL_GROUP_COREDUMP;
298                 clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
299         }
300         spin_unlock_irq(&tsk->sighand->siglock);
301         if (unlikely(nr < 0))
302                 return nr;
303
304         tsk->flags = PF_DUMPCORE;
305         if (atomic_read(&mm->mm_users) == nr + 1)
306                 goto done;
307         /*
308          * We should find and kill all tasks which use this mm, and we should
309          * count them correctly into ->nr_threads. We don't take tasklist
310          * lock, but this is safe wrt:
311          *
312          * fork:
313          *      None of sub-threads can fork after zap_process(leader). All
314          *      processes which were created before this point should be
315          *      visible to zap_threads() because copy_process() adds the new
316          *      process to the tail of init_task.tasks list, and lock/unlock
317          *      of ->siglock provides a memory barrier.
318          *
319          * do_exit:
320          *      The caller holds mm->mmap_sem. This means that the task which
321          *      uses this mm can't pass exit_mm(), so it can't exit or clear
322          *      its ->mm.
323          *
324          * de_thread:
325          *      It does list_replace_rcu(&leader->tasks, &current->tasks),
326          *      we must see either old or new leader, this does not matter.
327          *      However, it can change p->sighand, so lock_task_sighand(p)
328          *      must be used. Since p->mm != NULL and we hold ->mmap_sem
329          *      it can't fail.
330          *
331          *      Note also that "g" can be the old leader with ->mm == NULL
332          *      and already unhashed and thus removed from ->thread_group.
333          *      This is OK, __unhash_process()->list_del_rcu() does not
334          *      clear the ->next pointer, we will find the new leader via
335          *      next_thread().
336          */
337         rcu_read_lock();
338         for_each_process(g) {
339                 if (g == tsk->group_leader)
340                         continue;
341                 if (g->flags & PF_KTHREAD)
342                         continue;
343                 p = g;
344                 do {
345                         if (p->mm) {
346                                 if (unlikely(p->mm == mm)) {
347                                         lock_task_sighand(p, &flags);
348                                         nr += zap_process(p, exit_code);
349                                         p->signal->flags = SIGNAL_GROUP_EXIT;
350                                         unlock_task_sighand(p, &flags);
351                                 }
352                                 break;
353                         }
354                 } while_each_thread(g, p);
355         }
356         rcu_read_unlock();
357 done:
358         atomic_set(&core_state->nr_threads, nr);
359         return nr;
360 }
361
362 static int coredump_wait(int exit_code, struct core_state *core_state)
363 {
364         struct task_struct *tsk = current;
365         struct mm_struct *mm = tsk->mm;
366         int core_waiters = -EBUSY;
367
368         init_completion(&core_state->startup);
369         core_state->dumper.task = tsk;
370         core_state->dumper.next = NULL;
371
372         down_write(&mm->mmap_sem);
373         if (!mm->core_state)
374                 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
375         up_write(&mm->mmap_sem);
376
377         if (core_waiters > 0) {
378                 struct core_thread *ptr;
379
380                 wait_for_completion(&core_state->startup);
381                 /*
382                  * Wait for all the threads to become inactive, so that
383                  * all the thread context (extended register state, like
384                  * fpu etc) gets copied to the memory.
385                  */
386                 ptr = core_state->dumper.next;
387                 while (ptr != NULL) {
388                         wait_task_inactive(ptr->task, 0);
389                         ptr = ptr->next;
390                 }
391         }
392
393         return core_waiters;
394 }
395
396 static void coredump_finish(struct mm_struct *mm, bool core_dumped)
397 {
398         struct core_thread *curr, *next;
399         struct task_struct *task;
400
401         spin_lock_irq(&current->sighand->siglock);
402         if (core_dumped && !__fatal_signal_pending(current))
403                 current->signal->group_exit_code |= 0x80;
404         current->signal->group_exit_task = NULL;
405         current->signal->flags = SIGNAL_GROUP_EXIT;
406         spin_unlock_irq(&current->sighand->siglock);
407
408         next = mm->core_state->dumper.next;
409         while ((curr = next) != NULL) {
410                 next = curr->next;
411                 task = curr->task;
412                 /*
413                  * see exit_mm(), curr->task must not see
414                  * ->task == NULL before we read ->next.
415                  */
416                 smp_mb();
417                 curr->task = NULL;
418                 wake_up_process(task);
419         }
420
421         mm->core_state = NULL;
422 }
423
424 static bool dump_interrupted(void)
425 {
426         /*
427          * SIGKILL or freezing() interrupt the coredumping. Perhaps we
428          * can do try_to_freeze() and check __fatal_signal_pending(),
429          * but then we need to teach dump_write() to restart and clear
430          * TIF_SIGPENDING.
431          */
432         return signal_pending(current);
433 }
434
435 static void wait_for_dump_helpers(struct file *file)
436 {
437         struct pipe_inode_info *pipe = file->private_data;
438
439         pipe_lock(pipe);
440         pipe->readers++;
441         pipe->writers--;
442         wake_up_interruptible_sync(&pipe->wait);
443         kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
444         pipe_unlock(pipe);
445
446         /*
447          * We actually want wait_event_freezable() but then we need
448          * to clear TIF_SIGPENDING and improve dump_interrupted().
449          */
450         wait_event_interruptible(pipe->wait, pipe->readers == 1);
451
452         pipe_lock(pipe);
453         pipe->readers--;
454         pipe->writers++;
455         pipe_unlock(pipe);
456 }
457
458 /*
459  * umh_pipe_setup
460  * helper function to customize the process used
461  * to collect the core in userspace.  Specifically
462  * it sets up a pipe and installs it as fd 0 (stdin)
463  * for the process.  Returns 0 on success, or
464  * PTR_ERR on failure.
465  * Note that it also sets the core limit to 1.  This
466  * is a special value that we use to trap recursive
467  * core dumps
468  */
469 static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
470 {
471         struct file *files[2];
472         struct coredump_params *cp = (struct coredump_params *)info->data;
473         int err = create_pipe_files(files, 0);
474         if (err)
475                 return err;
476
477         cp->file = files[1];
478
479         err = replace_fd(0, files[0], 0);
480         fput(files[0]);
481         /* and disallow core files too */
482         current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
483
484         return err;
485 }
486
487 void do_coredump(const siginfo_t *siginfo)
488 {
489         struct core_state core_state;
490         struct core_name cn;
491         struct mm_struct *mm = current->mm;
492         struct linux_binfmt * binfmt;
493         const struct cred *old_cred;
494         struct cred *cred;
495         int retval = 0;
496         int flag = 0;
497         int ispipe;
498         struct files_struct *displaced;
499         bool need_nonrelative = false;
500         bool core_dumped = false;
501         static atomic_t core_dump_count = ATOMIC_INIT(0);
502         struct coredump_params cprm = {
503                 .siginfo = siginfo,
504                 .regs = signal_pt_regs(),
505                 .limit = rlimit(RLIMIT_CORE),
506                 /*
507                  * We must use the same mm->flags while dumping core to avoid
508                  * inconsistency of bit flags, since this flag is not protected
509                  * by any locks.
510                  */
511                 .mm_flags = mm->flags,
512         };
513
514         audit_core_dumps(siginfo->si_signo);
515
516         binfmt = mm->binfmt;
517         if (!binfmt || !binfmt->core_dump)
518                 goto fail;
519         if (!__get_dumpable(cprm.mm_flags))
520                 goto fail;
521
522         cred = prepare_creds();
523         if (!cred)
524                 goto fail;
525         /*
526          * We cannot trust fsuid as being the "true" uid of the process
527          * nor do we know its entire history. We only know it was tainted
528          * so we dump it as root in mode 2, and only into a controlled
529          * environment (pipe handler or fully qualified path).
530          */
531         if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {
532                 /* Setuid core dump mode */
533                 flag = O_EXCL;          /* Stop rewrite attacks */
534                 cred->fsuid = GLOBAL_ROOT_UID;  /* Dump root private */
535                 need_nonrelative = true;
536         }
537
538         retval = coredump_wait(siginfo->si_signo, &core_state);
539         if (retval < 0)
540                 goto fail_creds;
541
542         old_cred = override_creds(cred);
543
544         ispipe = format_corename(&cn, &cprm);
545
546         if (ispipe) {
547                 int dump_count;
548                 char **helper_argv;
549                 struct subprocess_info *sub_info;
550
551                 if (ispipe < 0) {
552                         printk(KERN_WARNING "format_corename failed\n");
553                         printk(KERN_WARNING "Aborting core\n");
554                         goto fail_unlock;
555                 }
556
557                 if (cprm.limit == 1) {
558                         /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
559                          *
560                          * Normally core limits are irrelevant to pipes, since
561                          * we're not writing to the file system, but we use
562                          * cprm.limit of 1 here as a speacial value, this is a
563                          * consistent way to catch recursive crashes.
564                          * We can still crash if the core_pattern binary sets
565                          * RLIM_CORE = !1, but it runs as root, and can do
566                          * lots of stupid things.
567                          *
568                          * Note that we use task_tgid_vnr here to grab the pid
569                          * of the process group leader.  That way we get the
570                          * right pid if a thread in a multi-threaded
571                          * core_pattern process dies.
572                          */
573                         printk(KERN_WARNING
574                                 "Process %d(%s) has RLIMIT_CORE set to 1\n",
575                                 task_tgid_vnr(current), current->comm);
576                         printk(KERN_WARNING "Aborting core\n");
577                         goto fail_unlock;
578                 }
579                 cprm.limit = RLIM_INFINITY;
580
581                 dump_count = atomic_inc_return(&core_dump_count);
582                 if (core_pipe_limit && (core_pipe_limit < dump_count)) {
583                         printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
584                                task_tgid_vnr(current), current->comm);
585                         printk(KERN_WARNING "Skipping core dump\n");
586                         goto fail_dropcount;
587                 }
588
589                 helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL);
590                 if (!helper_argv) {
591                         printk(KERN_WARNING "%s failed to allocate memory\n",
592                                __func__);
593                         goto fail_dropcount;
594                 }
595
596                 retval = -ENOMEM;
597                 sub_info = call_usermodehelper_setup(helper_argv[0],
598                                                 helper_argv, NULL, GFP_KERNEL,
599                                                 umh_pipe_setup, NULL, &cprm);
600                 if (sub_info)
601                         retval = call_usermodehelper_exec(sub_info,
602                                                           UMH_WAIT_EXEC);
603
604                 argv_free(helper_argv);
605                 if (retval) {
606                         printk(KERN_INFO "Core dump to |%s pipe failed\n",
607                                cn.corename);
608                         goto close_fail;
609                 }
610         } else {
611                 struct inode *inode;
612
613                 if (cprm.limit < binfmt->min_coredump)
614                         goto fail_unlock;
615
616                 if (need_nonrelative && cn.corename[0] != '/') {
617                         printk(KERN_WARNING "Pid %d(%s) can only dump core "\
618                                 "to fully qualified path!\n",
619                                 task_tgid_vnr(current), current->comm);
620                         printk(KERN_WARNING "Skipping core dump\n");
621                         goto fail_unlock;
622                 }
623
624                 cprm.file = filp_open(cn.corename,
625                                  O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
626                                  0600);
627                 if (IS_ERR(cprm.file))
628                         goto fail_unlock;
629
630                 inode = file_inode(cprm.file);
631                 if (inode->i_nlink > 1)
632                         goto close_fail;
633                 if (d_unhashed(cprm.file->f_path.dentry))
634                         goto close_fail;
635                 /*
636                  * AK: actually i see no reason to not allow this for named
637                  * pipes etc, but keep the previous behaviour for now.
638                  */
639                 if (!S_ISREG(inode->i_mode))
640                         goto close_fail;
641                 /*
642                  * Dont allow local users get cute and trick others to coredump
643                  * into their pre-created files.
644                  */
645                 if (!uid_eq(inode->i_uid, current_fsuid()))
646                         goto close_fail;
647                 if (!cprm.file->f_op->write)
648                         goto close_fail;
649                 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
650                         goto close_fail;
651         }
652
653         /* get us an unshared descriptor table; almost always a no-op */
654         retval = unshare_files(&displaced);
655         if (retval)
656                 goto close_fail;
657         if (displaced)
658                 put_files_struct(displaced);
659         if (!dump_interrupted()) {
660                 file_start_write(cprm.file);
661                 core_dumped = binfmt->core_dump(&cprm);
662                 file_end_write(cprm.file);
663         }
664         if (ispipe && core_pipe_limit)
665                 wait_for_dump_helpers(cprm.file);
666 close_fail:
667         if (cprm.file)
668                 filp_close(cprm.file, NULL);
669 fail_dropcount:
670         if (ispipe)
671                 atomic_dec(&core_dump_count);
672 fail_unlock:
673         kfree(cn.corename);
674         coredump_finish(mm, core_dumped);
675         revert_creds(old_cred);
676 fail_creds:
677         put_cred(cred);
678 fail:
679         return;
680 }
681
682 /*
683  * Core dumping helper functions.  These are the only things you should
684  * do on a core-file: use only these functions to write out all the
685  * necessary info.
686  */
687 int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
688 {
689         struct file *file = cprm->file;
690         loff_t pos = file->f_pos;
691         ssize_t n;
692         if (cprm->written + nr > cprm->limit)
693                 return 0;
694         while (nr) {
695                 if (dump_interrupted())
696                         return 0;
697                 n = __kernel_write(file, addr, nr, &pos);
698                 if (n <= 0)
699                         return 0;
700                 file->f_pos = pos;
701                 cprm->written += n;
702                 nr -= n;
703         }
704         return 1;
705 }
706 EXPORT_SYMBOL(dump_emit);
707
708 int dump_skip(struct coredump_params *cprm, size_t nr)
709 {
710         static char zeroes[PAGE_SIZE];
711         struct file *file = cprm->file;
712         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
713                 if (cprm->written + nr > cprm->limit)
714                         return 0;
715                 if (dump_interrupted() ||
716                     file->f_op->llseek(file, nr, SEEK_CUR) < 0)
717                         return 0;
718                 cprm->written += nr;
719                 return 1;
720         } else {
721                 while (nr > PAGE_SIZE) {
722                         if (!dump_emit(cprm, zeroes, PAGE_SIZE))
723                                 return 0;
724                         nr -= PAGE_SIZE;
725                 }
726                 return dump_emit(cprm, zeroes, nr);
727         }
728 }
729 EXPORT_SYMBOL(dump_skip);
730
731 int dump_align(struct coredump_params *cprm, int align)
732 {
733         unsigned mod = cprm->written & (align - 1);
734         if (align & (align - 1))
735                 return 0;
736         return mod ? dump_skip(cprm, align - mod) : 1;
737 }
738 EXPORT_SYMBOL(dump_align);