sysctl: Fix data-races in proc_dou8vec_minmax().
[platform/kernel/linux-rpi.git] / kernel / sysctl.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * sysctl.c: General linux system control interface
4  *
5  * Begun 24 March 1995, Stephen Tweedie
6  * Added /proc support, Dec 1995
7  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
8  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
9  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
10  * Dynamic registration fixes, Stephen Tweedie.
11  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
12  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13  *  Horn.
14  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
15  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
16  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17  *  Wendling.
18  * The list_for_each() macro wasn't appropriate for the sysctl loop.
19  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
20  */
21
22 #include <linux/module.h>
23 #include <linux/aio.h>
24 #include <linux/mm.h>
25 #include <linux/swap.h>
26 #include <linux/slab.h>
27 #include <linux/sysctl.h>
28 #include <linux/bitmap.h>
29 #include <linux/signal.h>
30 #include <linux/panic.h>
31 #include <linux/printk.h>
32 #include <linux/proc_fs.h>
33 #include <linux/security.h>
34 #include <linux/ctype.h>
35 #include <linux/kmemleak.h>
36 #include <linux/fs.h>
37 #include <linux/init.h>
38 #include <linux/kernel.h>
39 #include <linux/kobject.h>
40 #include <linux/net.h>
41 #include <linux/sysrq.h>
42 #include <linux/highuid.h>
43 #include <linux/writeback.h>
44 #include <linux/ratelimit.h>
45 #include <linux/compaction.h>
46 #include <linux/hugetlb.h>
47 #include <linux/initrd.h>
48 #include <linux/key.h>
49 #include <linux/times.h>
50 #include <linux/limits.h>
51 #include <linux/dcache.h>
52 #include <linux/dnotify.h>
53 #include <linux/syscalls.h>
54 #include <linux/vmstat.h>
55 #include <linux/nfs_fs.h>
56 #include <linux/acpi.h>
57 #include <linux/reboot.h>
58 #include <linux/ftrace.h>
59 #include <linux/perf_event.h>
60 #include <linux/kprobes.h>
61 #include <linux/pipe_fs_i.h>
62 #include <linux/oom.h>
63 #include <linux/kmod.h>
64 #include <linux/capability.h>
65 #include <linux/binfmts.h>
66 #include <linux/sched/sysctl.h>
67 #include <linux/sched/coredump.h>
68 #include <linux/kexec.h>
69 #include <linux/bpf.h>
70 #include <linux/mount.h>
71 #include <linux/userfaultfd_k.h>
72 #include <linux/coredump.h>
73 #include <linux/latencytop.h>
74 #include <linux/pid.h>
75 #include <linux/delayacct.h>
76
77 #include "../lib/kstrtox.h"
78
79 #include <linux/uaccess.h>
80 #include <asm/processor.h>
81
82 #ifdef CONFIG_X86
83 #include <asm/nmi.h>
84 #include <asm/stacktrace.h>
85 #include <asm/io.h>
86 #endif
87 #ifdef CONFIG_SPARC
88 #include <asm/setup.h>
89 #endif
90 #ifdef CONFIG_BSD_PROCESS_ACCT
91 #include <linux/acct.h>
92 #endif
93 #ifdef CONFIG_RT_MUTEXES
94 #include <linux/rtmutex.h>
95 #endif
96 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
97 #include <linux/lockdep.h>
98 #endif
99 #ifdef CONFIG_CHR_DEV_SG
100 #include <scsi/sg.h>
101 #endif
102 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
103 #include <linux/stackleak.h>
104 #endif
105 #ifdef CONFIG_LOCKUP_DETECTOR
106 #include <linux/nmi.h>
107 #endif
108
109 #if defined(CONFIG_SYSCTL)
110
111 /* Constants used for minimum and  maximum */
112 #ifdef CONFIG_LOCKUP_DETECTOR
113 static int sixty = 60;
114 #endif
115
116 static int __maybe_unused neg_one = -1;
117 static int __maybe_unused two = 2;
118 static int __maybe_unused four = 4;
119 static unsigned long zero_ul;
120 static unsigned long one_ul = 1;
121 static unsigned long long_max = LONG_MAX;
122 static int one_hundred = 100;
123 static int two_hundred = 200;
124 static int one_thousand = 1000;
125 #ifdef CONFIG_PRINTK
126 static int ten_thousand = 10000;
127 #endif
128 #ifdef CONFIG_PERF_EVENTS
129 static int six_hundred_forty_kb = 640 * 1024;
130 #endif
131
132 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
133 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
134
135 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
136 static int maxolduid = 65535;
137 static int minolduid;
138
139 static int ngroups_max = NGROUPS_MAX;
140 static const int cap_last_cap = CAP_LAST_CAP;
141
142 /*
143  * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
144  * and hung_task_check_interval_secs
145  */
146 #ifdef CONFIG_DETECT_HUNG_TASK
147 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
148 #endif
149
150 #ifdef CONFIG_INOTIFY_USER
151 #include <linux/inotify.h>
152 #endif
153 #ifdef CONFIG_FANOTIFY
154 #include <linux/fanotify.h>
155 #endif
156
157 #ifdef CONFIG_PROC_SYSCTL
158
159 /**
160  * enum sysctl_writes_mode - supported sysctl write modes
161  *
162  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
163  *      to be written, and multiple writes on the same sysctl file descriptor
164  *      will rewrite the sysctl value, regardless of file position. No warning
165  *      is issued when the initial position is not 0.
166  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
167  *      not 0.
168  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
169  *      file position 0 and the value must be fully contained in the buffer
170  *      sent to the write syscall. If dealing with strings respect the file
171  *      position, but restrict this to the max length of the buffer, anything
172  *      passed the max length will be ignored. Multiple writes will append
173  *      to the buffer.
174  *
175  * These write modes control how current file position affects the behavior of
176  * updating sysctl values through the proc interface on each write.
177  */
178 enum sysctl_writes_mode {
179         SYSCTL_WRITES_LEGACY            = -1,
180         SYSCTL_WRITES_WARN              = 0,
181         SYSCTL_WRITES_STRICT            = 1,
182 };
183
184 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
185 #endif /* CONFIG_PROC_SYSCTL */
186
187 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
188     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
189 int sysctl_legacy_va_layout;
190 #endif
191
192 #ifdef CONFIG_COMPACTION
193 static int min_extfrag_threshold;
194 static int max_extfrag_threshold = 1000;
195 #endif
196
197 #endif /* CONFIG_SYSCTL */
198
199 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
200 static int bpf_stats_handler(struct ctl_table *table, int write,
201                              void *buffer, size_t *lenp, loff_t *ppos)
202 {
203         struct static_key *key = (struct static_key *)table->data;
204         static int saved_val;
205         int val, ret;
206         struct ctl_table tmp = {
207                 .data   = &val,
208                 .maxlen = sizeof(val),
209                 .mode   = table->mode,
210                 .extra1 = SYSCTL_ZERO,
211                 .extra2 = SYSCTL_ONE,
212         };
213
214         if (write && !capable(CAP_SYS_ADMIN))
215                 return -EPERM;
216
217         mutex_lock(&bpf_stats_enabled_mutex);
218         val = saved_val;
219         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
220         if (write && !ret && val != saved_val) {
221                 if (val)
222                         static_key_slow_inc(key);
223                 else
224                         static_key_slow_dec(key);
225                 saved_val = val;
226         }
227         mutex_unlock(&bpf_stats_enabled_mutex);
228         return ret;
229 }
230
231 void __weak unpriv_ebpf_notify(int new_state)
232 {
233 }
234
235 static int bpf_unpriv_handler(struct ctl_table *table, int write,
236                               void *buffer, size_t *lenp, loff_t *ppos)
237 {
238         int ret, unpriv_enable = *(int *)table->data;
239         bool locked_state = unpriv_enable == 1;
240         struct ctl_table tmp = *table;
241
242         if (write && !capable(CAP_SYS_ADMIN))
243                 return -EPERM;
244
245         tmp.data = &unpriv_enable;
246         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
247         if (write && !ret) {
248                 if (locked_state && unpriv_enable != 1)
249                         return -EPERM;
250                 *(int *)table->data = unpriv_enable;
251         }
252
253         unpriv_ebpf_notify(unpriv_enable);
254
255         return ret;
256 }
257 #endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */
258
259 /*
260  * /proc/sys support
261  */
262
263 #ifdef CONFIG_PROC_SYSCTL
264
265 static int _proc_do_string(char *data, int maxlen, int write,
266                 char *buffer, size_t *lenp, loff_t *ppos)
267 {
268         size_t len;
269         char c, *p;
270
271         if (!data || !maxlen || !*lenp) {
272                 *lenp = 0;
273                 return 0;
274         }
275
276         if (write) {
277                 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
278                         /* Only continue writes not past the end of buffer. */
279                         len = strlen(data);
280                         if (len > maxlen - 1)
281                                 len = maxlen - 1;
282
283                         if (*ppos > len)
284                                 return 0;
285                         len = *ppos;
286                 } else {
287                         /* Start writing from beginning of buffer. */
288                         len = 0;
289                 }
290
291                 *ppos += *lenp;
292                 p = buffer;
293                 while ((p - buffer) < *lenp && len < maxlen - 1) {
294                         c = *(p++);
295                         if (c == 0 || c == '\n')
296                                 break;
297                         data[len++] = c;
298                 }
299                 data[len] = 0;
300         } else {
301                 len = strlen(data);
302                 if (len > maxlen)
303                         len = maxlen;
304
305                 if (*ppos > len) {
306                         *lenp = 0;
307                         return 0;
308                 }
309
310                 data += *ppos;
311                 len  -= *ppos;
312
313                 if (len > *lenp)
314                         len = *lenp;
315                 if (len)
316                         memcpy(buffer, data, len);
317                 if (len < *lenp) {
318                         buffer[len] = '\n';
319                         len++;
320                 }
321                 *lenp = len;
322                 *ppos += len;
323         }
324         return 0;
325 }
326
327 static void warn_sysctl_write(struct ctl_table *table)
328 {
329         pr_warn_once("%s wrote to %s when file position was not 0!\n"
330                 "This will not be supported in the future. To silence this\n"
331                 "warning, set kernel.sysctl_writes_strict = -1\n",
332                 current->comm, table->procname);
333 }
334
335 /**
336  * proc_first_pos_non_zero_ignore - check if first position is allowed
337  * @ppos: file position
338  * @table: the sysctl table
339  *
340  * Returns true if the first position is non-zero and the sysctl_writes_strict
341  * mode indicates this is not allowed for numeric input types. String proc
342  * handlers can ignore the return value.
343  */
344 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
345                                            struct ctl_table *table)
346 {
347         if (!*ppos)
348                 return false;
349
350         switch (sysctl_writes_strict) {
351         case SYSCTL_WRITES_STRICT:
352                 return true;
353         case SYSCTL_WRITES_WARN:
354                 warn_sysctl_write(table);
355                 return false;
356         default:
357                 return false;
358         }
359 }
360
361 /**
362  * proc_dostring - read a string sysctl
363  * @table: the sysctl table
364  * @write: %TRUE if this is a write to the sysctl file
365  * @buffer: the user buffer
366  * @lenp: the size of the user buffer
367  * @ppos: file position
368  *
369  * Reads/writes a string from/to the user buffer. If the kernel
370  * buffer provided is not large enough to hold the string, the
371  * string is truncated. The copied string is %NULL-terminated.
372  * If the string is being read by the user process, it is copied
373  * and a newline '\n' is added. It is truncated if the buffer is
374  * not large enough.
375  *
376  * Returns 0 on success.
377  */
378 int proc_dostring(struct ctl_table *table, int write,
379                   void *buffer, size_t *lenp, loff_t *ppos)
380 {
381         if (write)
382                 proc_first_pos_non_zero_ignore(ppos, table);
383
384         return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
385                         ppos);
386 }
387
388 static size_t proc_skip_spaces(char **buf)
389 {
390         size_t ret;
391         char *tmp = skip_spaces(*buf);
392         ret = tmp - *buf;
393         *buf = tmp;
394         return ret;
395 }
396
397 static void proc_skip_char(char **buf, size_t *size, const char v)
398 {
399         while (*size) {
400                 if (**buf != v)
401                         break;
402                 (*size)--;
403                 (*buf)++;
404         }
405 }
406
407 /**
408  * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
409  *                   fail on overflow
410  *
411  * @cp: kernel buffer containing the string to parse
412  * @endp: pointer to store the trailing characters
413  * @base: the base to use
414  * @res: where the parsed integer will be stored
415  *
416  * In case of success 0 is returned and @res will contain the parsed integer,
417  * @endp will hold any trailing characters.
418  * This function will fail the parse on overflow. If there wasn't an overflow
419  * the function will defer the decision what characters count as invalid to the
420  * caller.
421  */
422 static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
423                            unsigned long *res)
424 {
425         unsigned long long result;
426         unsigned int rv;
427
428         cp = _parse_integer_fixup_radix(cp, &base);
429         rv = _parse_integer(cp, base, &result);
430         if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
431                 return -ERANGE;
432
433         cp += rv;
434
435         if (endp)
436                 *endp = (char *)cp;
437
438         *res = (unsigned long)result;
439         return 0;
440 }
441
442 #define TMPBUFLEN 22
443 /**
444  * proc_get_long - reads an ASCII formatted integer from a user buffer
445  *
446  * @buf: a kernel buffer
447  * @size: size of the kernel buffer
448  * @val: this is where the number will be stored
449  * @neg: set to %TRUE if number is negative
450  * @perm_tr: a vector which contains the allowed trailers
451  * @perm_tr_len: size of the perm_tr vector
452  * @tr: pointer to store the trailer character
453  *
454  * In case of success %0 is returned and @buf and @size are updated with
455  * the amount of bytes read. If @tr is non-NULL and a trailing
456  * character exists (size is non-zero after returning from this
457  * function), @tr is updated with the trailing character.
458  */
459 static int proc_get_long(char **buf, size_t *size,
460                           unsigned long *val, bool *neg,
461                           const char *perm_tr, unsigned perm_tr_len, char *tr)
462 {
463         int len;
464         char *p, tmp[TMPBUFLEN];
465
466         if (!*size)
467                 return -EINVAL;
468
469         len = *size;
470         if (len > TMPBUFLEN - 1)
471                 len = TMPBUFLEN - 1;
472
473         memcpy(tmp, *buf, len);
474
475         tmp[len] = 0;
476         p = tmp;
477         if (*p == '-' && *size > 1) {
478                 *neg = true;
479                 p++;
480         } else
481                 *neg = false;
482         if (!isdigit(*p))
483                 return -EINVAL;
484
485         if (strtoul_lenient(p, &p, 0, val))
486                 return -EINVAL;
487
488         len = p - tmp;
489
490         /* We don't know if the next char is whitespace thus we may accept
491          * invalid integers (e.g. 1234...a) or two integers instead of one
492          * (e.g. 123...1). So lets not allow such large numbers. */
493         if (len == TMPBUFLEN - 1)
494                 return -EINVAL;
495
496         if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
497                 return -EINVAL;
498
499         if (tr && (len < *size))
500                 *tr = *p;
501
502         *buf += len;
503         *size -= len;
504
505         return 0;
506 }
507
508 /**
509  * proc_put_long - converts an integer to a decimal ASCII formatted string
510  *
511  * @buf: the user buffer
512  * @size: the size of the user buffer
513  * @val: the integer to be converted
514  * @neg: sign of the number, %TRUE for negative
515  *
516  * In case of success @buf and @size are updated with the amount of bytes
517  * written.
518  */
519 static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
520 {
521         int len;
522         char tmp[TMPBUFLEN], *p = tmp;
523
524         sprintf(p, "%s%lu", neg ? "-" : "", val);
525         len = strlen(tmp);
526         if (len > *size)
527                 len = *size;
528         memcpy(*buf, tmp, len);
529         *size -= len;
530         *buf += len;
531 }
532 #undef TMPBUFLEN
533
534 static void proc_put_char(void **buf, size_t *size, char c)
535 {
536         if (*size) {
537                 char **buffer = (char **)buf;
538                 **buffer = c;
539
540                 (*size)--;
541                 (*buffer)++;
542                 *buf = *buffer;
543         }
544 }
545
546 static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp,
547                                 int *valp,
548                                 int write, void *data)
549 {
550         if (write) {
551                 *(bool *)valp = *lvalp;
552         } else {
553                 int val = *(bool *)valp;
554
555                 *lvalp = (unsigned long)val;
556                 *negp = false;
557         }
558         return 0;
559 }
560
561 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
562                                  int *valp,
563                                  int write, void *data)
564 {
565         if (write) {
566                 if (*negp) {
567                         if (*lvalp > (unsigned long) INT_MAX + 1)
568                                 return -EINVAL;
569                         WRITE_ONCE(*valp, -*lvalp);
570                 } else {
571                         if (*lvalp > (unsigned long) INT_MAX)
572                                 return -EINVAL;
573                         WRITE_ONCE(*valp, *lvalp);
574                 }
575         } else {
576                 int val = READ_ONCE(*valp);
577                 if (val < 0) {
578                         *negp = true;
579                         *lvalp = -(unsigned long)val;
580                 } else {
581                         *negp = false;
582                         *lvalp = (unsigned long)val;
583                 }
584         }
585         return 0;
586 }
587
588 static int do_proc_douintvec_conv(unsigned long *lvalp,
589                                   unsigned int *valp,
590                                   int write, void *data)
591 {
592         if (write) {
593                 if (*lvalp > UINT_MAX)
594                         return -EINVAL;
595                 WRITE_ONCE(*valp, *lvalp);
596         } else {
597                 unsigned int val = READ_ONCE(*valp);
598                 *lvalp = (unsigned long)val;
599         }
600         return 0;
601 }
602
603 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
604
605 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
606                   int write, void *buffer,
607                   size_t *lenp, loff_t *ppos,
608                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
609                               int write, void *data),
610                   void *data)
611 {
612         int *i, vleft, first = 1, err = 0;
613         size_t left;
614         char *p;
615         
616         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
617                 *lenp = 0;
618                 return 0;
619         }
620         
621         i = (int *) tbl_data;
622         vleft = table->maxlen / sizeof(*i);
623         left = *lenp;
624
625         if (!conv)
626                 conv = do_proc_dointvec_conv;
627
628         if (write) {
629                 if (proc_first_pos_non_zero_ignore(ppos, table))
630                         goto out;
631
632                 if (left > PAGE_SIZE - 1)
633                         left = PAGE_SIZE - 1;
634                 p = buffer;
635         }
636
637         for (; left && vleft--; i++, first=0) {
638                 unsigned long lval;
639                 bool neg;
640
641                 if (write) {
642                         left -= proc_skip_spaces(&p);
643
644                         if (!left)
645                                 break;
646                         err = proc_get_long(&p, &left, &lval, &neg,
647                                              proc_wspace_sep,
648                                              sizeof(proc_wspace_sep), NULL);
649                         if (err)
650                                 break;
651                         if (conv(&neg, &lval, i, 1, data)) {
652                                 err = -EINVAL;
653                                 break;
654                         }
655                 } else {
656                         if (conv(&neg, &lval, i, 0, data)) {
657                                 err = -EINVAL;
658                                 break;
659                         }
660                         if (!first)
661                                 proc_put_char(&buffer, &left, '\t');
662                         proc_put_long(&buffer, &left, lval, neg);
663                 }
664         }
665
666         if (!write && !first && left && !err)
667                 proc_put_char(&buffer, &left, '\n');
668         if (write && !err && left)
669                 left -= proc_skip_spaces(&p);
670         if (write && first)
671                 return err ? : -EINVAL;
672         *lenp -= left;
673 out:
674         *ppos += *lenp;
675         return err;
676 }
677
678 static int do_proc_dointvec(struct ctl_table *table, int write,
679                   void *buffer, size_t *lenp, loff_t *ppos,
680                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
681                               int write, void *data),
682                   void *data)
683 {
684         return __do_proc_dointvec(table->data, table, write,
685                         buffer, lenp, ppos, conv, data);
686 }
687
688 static int do_proc_douintvec_w(unsigned int *tbl_data,
689                                struct ctl_table *table,
690                                void *buffer,
691                                size_t *lenp, loff_t *ppos,
692                                int (*conv)(unsigned long *lvalp,
693                                            unsigned int *valp,
694                                            int write, void *data),
695                                void *data)
696 {
697         unsigned long lval;
698         int err = 0;
699         size_t left;
700         bool neg;
701         char *p = buffer;
702
703         left = *lenp;
704
705         if (proc_first_pos_non_zero_ignore(ppos, table))
706                 goto bail_early;
707
708         if (left > PAGE_SIZE - 1)
709                 left = PAGE_SIZE - 1;
710
711         left -= proc_skip_spaces(&p);
712         if (!left) {
713                 err = -EINVAL;
714                 goto out_free;
715         }
716
717         err = proc_get_long(&p, &left, &lval, &neg,
718                              proc_wspace_sep,
719                              sizeof(proc_wspace_sep), NULL);
720         if (err || neg) {
721                 err = -EINVAL;
722                 goto out_free;
723         }
724
725         if (conv(&lval, tbl_data, 1, data)) {
726                 err = -EINVAL;
727                 goto out_free;
728         }
729
730         if (!err && left)
731                 left -= proc_skip_spaces(&p);
732
733 out_free:
734         if (err)
735                 return -EINVAL;
736
737         return 0;
738
739         /* This is in keeping with old __do_proc_dointvec() */
740 bail_early:
741         *ppos += *lenp;
742         return err;
743 }
744
745 static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
746                                size_t *lenp, loff_t *ppos,
747                                int (*conv)(unsigned long *lvalp,
748                                            unsigned int *valp,
749                                            int write, void *data),
750                                void *data)
751 {
752         unsigned long lval;
753         int err = 0;
754         size_t left;
755
756         left = *lenp;
757
758         if (conv(&lval, tbl_data, 0, data)) {
759                 err = -EINVAL;
760                 goto out;
761         }
762
763         proc_put_long(&buffer, &left, lval, false);
764         if (!left)
765                 goto out;
766
767         proc_put_char(&buffer, &left, '\n');
768
769 out:
770         *lenp -= left;
771         *ppos += *lenp;
772
773         return err;
774 }
775
776 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
777                                int write, void *buffer,
778                                size_t *lenp, loff_t *ppos,
779                                int (*conv)(unsigned long *lvalp,
780                                            unsigned int *valp,
781                                            int write, void *data),
782                                void *data)
783 {
784         unsigned int *i, vleft;
785
786         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
787                 *lenp = 0;
788                 return 0;
789         }
790
791         i = (unsigned int *) tbl_data;
792         vleft = table->maxlen / sizeof(*i);
793
794         /*
795          * Arrays are not supported, keep this simple. *Do not* add
796          * support for them.
797          */
798         if (vleft != 1) {
799                 *lenp = 0;
800                 return -EINVAL;
801         }
802
803         if (!conv)
804                 conv = do_proc_douintvec_conv;
805
806         if (write)
807                 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
808                                            conv, data);
809         return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
810 }
811
812 static int do_proc_douintvec(struct ctl_table *table, int write,
813                              void *buffer, size_t *lenp, loff_t *ppos,
814                              int (*conv)(unsigned long *lvalp,
815                                          unsigned int *valp,
816                                          int write, void *data),
817                              void *data)
818 {
819         return __do_proc_douintvec(table->data, table, write,
820                                    buffer, lenp, ppos, conv, data);
821 }
822
823 /**
824  * proc_dobool - read/write a bool
825  * @table: the sysctl table
826  * @write: %TRUE if this is a write to the sysctl file
827  * @buffer: the user buffer
828  * @lenp: the size of the user buffer
829  * @ppos: file position
830  *
831  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
832  * values from/to the user buffer, treated as an ASCII string.
833  *
834  * Returns 0 on success.
835  */
836 int proc_dobool(struct ctl_table *table, int write, void *buffer,
837                 size_t *lenp, loff_t *ppos)
838 {
839         return do_proc_dointvec(table, write, buffer, lenp, ppos,
840                                 do_proc_dobool_conv, NULL);
841 }
842
843 /**
844  * proc_dointvec - read a vector of integers
845  * @table: the sysctl table
846  * @write: %TRUE if this is a write to the sysctl file
847  * @buffer: the user buffer
848  * @lenp: the size of the user buffer
849  * @ppos: file position
850  *
851  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
852  * values from/to the user buffer, treated as an ASCII string. 
853  *
854  * Returns 0 on success.
855  */
856 int proc_dointvec(struct ctl_table *table, int write, void *buffer,
857                   size_t *lenp, loff_t *ppos)
858 {
859         return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
860 }
861
862 #ifdef CONFIG_COMPACTION
863 static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
864                 int write, void *buffer, size_t *lenp, loff_t *ppos)
865 {
866         int ret, old;
867
868         if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
869                 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
870
871         old = *(int *)table->data;
872         ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
873         if (ret)
874                 return ret;
875         if (old != *(int *)table->data)
876                 pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
877                              table->procname, current->comm,
878                              task_pid_nr(current));
879         return ret;
880 }
881 #endif
882
883 /**
884  * proc_douintvec - read a vector of unsigned integers
885  * @table: the sysctl table
886  * @write: %TRUE if this is a write to the sysctl file
887  * @buffer: the user buffer
888  * @lenp: the size of the user buffer
889  * @ppos: file position
890  *
891  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
892  * values from/to the user buffer, treated as an ASCII string.
893  *
894  * Returns 0 on success.
895  */
896 int proc_douintvec(struct ctl_table *table, int write, void *buffer,
897                 size_t *lenp, loff_t *ppos)
898 {
899         return do_proc_douintvec(table, write, buffer, lenp, ppos,
900                                  do_proc_douintvec_conv, NULL);
901 }
902
903 /*
904  * Taint values can only be increased
905  * This means we can safely use a temporary.
906  */
907 static int proc_taint(struct ctl_table *table, int write,
908                                void *buffer, size_t *lenp, loff_t *ppos)
909 {
910         struct ctl_table t;
911         unsigned long tmptaint = get_taint();
912         int err;
913
914         if (write && !capable(CAP_SYS_ADMIN))
915                 return -EPERM;
916
917         t = *table;
918         t.data = &tmptaint;
919         err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
920         if (err < 0)
921                 return err;
922
923         if (write) {
924                 int i;
925
926                 /*
927                  * If we are relying on panic_on_taint not producing
928                  * false positives due to userspace input, bail out
929                  * before setting the requested taint flags.
930                  */
931                 if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
932                         return -EINVAL;
933
934                 /*
935                  * Poor man's atomic or. Not worth adding a primitive
936                  * to everyone's atomic.h for this
937                  */
938                 for (i = 0; i < TAINT_FLAGS_COUNT; i++)
939                         if ((1UL << i) & tmptaint)
940                                 add_taint(i, LOCKDEP_STILL_OK);
941         }
942
943         return err;
944 }
945
946 #ifdef CONFIG_PRINTK
947 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
948                                 void *buffer, size_t *lenp, loff_t *ppos)
949 {
950         if (write && !capable(CAP_SYS_ADMIN))
951                 return -EPERM;
952
953         return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
954 }
955 #endif
956
957 /**
958  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
959  * @min: pointer to minimum allowable value
960  * @max: pointer to maximum allowable value
961  *
962  * The do_proc_dointvec_minmax_conv_param structure provides the
963  * minimum and maximum values for doing range checking for those sysctl
964  * parameters that use the proc_dointvec_minmax() handler.
965  */
966 struct do_proc_dointvec_minmax_conv_param {
967         int *min;
968         int *max;
969 };
970
971 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
972                                         int *valp,
973                                         int write, void *data)
974 {
975         int tmp, ret;
976         struct do_proc_dointvec_minmax_conv_param *param = data;
977         /*
978          * If writing, first do so via a temporary local int so we can
979          * bounds-check it before touching *valp.
980          */
981         int *ip = write ? &tmp : valp;
982
983         ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
984         if (ret)
985                 return ret;
986
987         if (write) {
988                 if ((param->min && *param->min > tmp) ||
989                     (param->max && *param->max < tmp))
990                         return -EINVAL;
991                 WRITE_ONCE(*valp, tmp);
992         }
993
994         return 0;
995 }
996
997 /**
998  * proc_dointvec_minmax - read a vector of integers with min/max values
999  * @table: the sysctl table
1000  * @write: %TRUE if this is a write to the sysctl file
1001  * @buffer: the user buffer
1002  * @lenp: the size of the user buffer
1003  * @ppos: file position
1004  *
1005  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1006  * values from/to the user buffer, treated as an ASCII string.
1007  *
1008  * This routine will ensure the values are within the range specified by
1009  * table->extra1 (min) and table->extra2 (max).
1010  *
1011  * Returns 0 on success or -EINVAL on write when the range check fails.
1012  */
1013 int proc_dointvec_minmax(struct ctl_table *table, int write,
1014                   void *buffer, size_t *lenp, loff_t *ppos)
1015 {
1016         struct do_proc_dointvec_minmax_conv_param param = {
1017                 .min = (int *) table->extra1,
1018                 .max = (int *) table->extra2,
1019         };
1020         return do_proc_dointvec(table, write, buffer, lenp, ppos,
1021                                 do_proc_dointvec_minmax_conv, &param);
1022 }
1023
1024 /**
1025  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
1026  * @min: pointer to minimum allowable value
1027  * @max: pointer to maximum allowable value
1028  *
1029  * The do_proc_douintvec_minmax_conv_param structure provides the
1030  * minimum and maximum values for doing range checking for those sysctl
1031  * parameters that use the proc_douintvec_minmax() handler.
1032  */
1033 struct do_proc_douintvec_minmax_conv_param {
1034         unsigned int *min;
1035         unsigned int *max;
1036 };
1037
1038 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
1039                                          unsigned int *valp,
1040                                          int write, void *data)
1041 {
1042         int ret;
1043         unsigned int tmp;
1044         struct do_proc_douintvec_minmax_conv_param *param = data;
1045         /* write via temporary local uint for bounds-checking */
1046         unsigned int *up = write ? &tmp : valp;
1047
1048         ret = do_proc_douintvec_conv(lvalp, up, write, data);
1049         if (ret)
1050                 return ret;
1051
1052         if (write) {
1053                 if ((param->min && *param->min > tmp) ||
1054                     (param->max && *param->max < tmp))
1055                         return -ERANGE;
1056
1057                 WRITE_ONCE(*valp, tmp);
1058         }
1059
1060         return 0;
1061 }
1062
1063 /**
1064  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
1065  * @table: the sysctl table
1066  * @write: %TRUE if this is a write to the sysctl file
1067  * @buffer: the user buffer
1068  * @lenp: the size of the user buffer
1069  * @ppos: file position
1070  *
1071  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
1072  * values from/to the user buffer, treated as an ASCII string. Negative
1073  * strings are not allowed.
1074  *
1075  * This routine will ensure the values are within the range specified by
1076  * table->extra1 (min) and table->extra2 (max). There is a final sanity
1077  * check for UINT_MAX to avoid having to support wrap around uses from
1078  * userspace.
1079  *
1080  * Returns 0 on success or -ERANGE on write when the range check fails.
1081  */
1082 int proc_douintvec_minmax(struct ctl_table *table, int write,
1083                           void *buffer, size_t *lenp, loff_t *ppos)
1084 {
1085         struct do_proc_douintvec_minmax_conv_param param = {
1086                 .min = (unsigned int *) table->extra1,
1087                 .max = (unsigned int *) table->extra2,
1088         };
1089         return do_proc_douintvec(table, write, buffer, lenp, ppos,
1090                                  do_proc_douintvec_minmax_conv, &param);
1091 }
1092
1093 /**
1094  * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
1095  * @table: the sysctl table
1096  * @write: %TRUE if this is a write to the sysctl file
1097  * @buffer: the user buffer
1098  * @lenp: the size of the user buffer
1099  * @ppos: file position
1100  *
1101  * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
1102  * values from/to the user buffer, treated as an ASCII string. Negative
1103  * strings are not allowed.
1104  *
1105  * This routine will ensure the values are within the range specified by
1106  * table->extra1 (min) and table->extra2 (max).
1107  *
1108  * Returns 0 on success or an error on write when the range check fails.
1109  */
1110 int proc_dou8vec_minmax(struct ctl_table *table, int write,
1111                         void *buffer, size_t *lenp, loff_t *ppos)
1112 {
1113         struct ctl_table tmp;
1114         unsigned int min = 0, max = 255U, val;
1115         u8 *data = table->data;
1116         struct do_proc_douintvec_minmax_conv_param param = {
1117                 .min = &min,
1118                 .max = &max,
1119         };
1120         int res;
1121
1122         /* Do not support arrays yet. */
1123         if (table->maxlen != sizeof(u8))
1124                 return -EINVAL;
1125
1126         if (table->extra1) {
1127                 min = *(unsigned int *) table->extra1;
1128                 if (min > 255U)
1129                         return -EINVAL;
1130         }
1131         if (table->extra2) {
1132                 max = *(unsigned int *) table->extra2;
1133                 if (max > 255U)
1134                         return -EINVAL;
1135         }
1136
1137         tmp = *table;
1138
1139         tmp.maxlen = sizeof(val);
1140         tmp.data = &val;
1141         val = READ_ONCE(*data);
1142         res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
1143                                 do_proc_douintvec_minmax_conv, &param);
1144         if (res)
1145                 return res;
1146         if (write)
1147                 WRITE_ONCE(*data, val);
1148         return 0;
1149 }
1150 EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
1151
1152 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
1153                                         unsigned int *valp,
1154                                         int write, void *data)
1155 {
1156         if (write) {
1157                 unsigned int val;
1158
1159                 val = round_pipe_size(*lvalp);
1160                 if (val == 0)
1161                         return -EINVAL;
1162
1163                 *valp = val;
1164         } else {
1165                 unsigned int val = *valp;
1166                 *lvalp = (unsigned long) val;
1167         }
1168
1169         return 0;
1170 }
1171
1172 static int proc_dopipe_max_size(struct ctl_table *table, int write,
1173                                 void *buffer, size_t *lenp, loff_t *ppos)
1174 {
1175         return do_proc_douintvec(table, write, buffer, lenp, ppos,
1176                                  do_proc_dopipe_max_size_conv, NULL);
1177 }
1178
1179 static void validate_coredump_safety(void)
1180 {
1181 #ifdef CONFIG_COREDUMP
1182         if (suid_dumpable == SUID_DUMP_ROOT &&
1183             core_pattern[0] != '/' && core_pattern[0] != '|') {
1184                 printk(KERN_WARNING
1185 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
1186 "Pipe handler or fully qualified core dump path required.\n"
1187 "Set kernel.core_pattern before fs.suid_dumpable.\n"
1188                 );
1189         }
1190 #endif
1191 }
1192
1193 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
1194                 void *buffer, size_t *lenp, loff_t *ppos)
1195 {
1196         int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
1197         if (!error)
1198                 validate_coredump_safety();
1199         return error;
1200 }
1201
1202 #ifdef CONFIG_COREDUMP
1203 static int proc_dostring_coredump(struct ctl_table *table, int write,
1204                   void *buffer, size_t *lenp, loff_t *ppos)
1205 {
1206         int error = proc_dostring(table, write, buffer, lenp, ppos);
1207         if (!error)
1208                 validate_coredump_safety();
1209         return error;
1210 }
1211 #endif
1212
1213 #ifdef CONFIG_MAGIC_SYSRQ
1214 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
1215                                 void *buffer, size_t *lenp, loff_t *ppos)
1216 {
1217         int tmp, ret;
1218
1219         tmp = sysrq_mask();
1220
1221         ret = __do_proc_dointvec(&tmp, table, write, buffer,
1222                                lenp, ppos, NULL, NULL);
1223         if (ret || !write)
1224                 return ret;
1225
1226         if (write)
1227                 sysrq_toggle_support(tmp);
1228
1229         return 0;
1230 }
1231 #endif
1232
1233 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
1234                 int write, void *buffer, size_t *lenp, loff_t *ppos,
1235                 unsigned long convmul, unsigned long convdiv)
1236 {
1237         unsigned long *i, *min, *max;
1238         int vleft, first = 1, err = 0;
1239         size_t left;
1240         char *p;
1241
1242         if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1243                 *lenp = 0;
1244                 return 0;
1245         }
1246
1247         i = (unsigned long *) data;
1248         min = (unsigned long *) table->extra1;
1249         max = (unsigned long *) table->extra2;
1250         vleft = table->maxlen / sizeof(unsigned long);
1251         left = *lenp;
1252
1253         if (write) {
1254                 if (proc_first_pos_non_zero_ignore(ppos, table))
1255                         goto out;
1256
1257                 if (left > PAGE_SIZE - 1)
1258                         left = PAGE_SIZE - 1;
1259                 p = buffer;
1260         }
1261
1262         for (; left && vleft--; i++, first = 0) {
1263                 unsigned long val;
1264
1265                 if (write) {
1266                         bool neg;
1267
1268                         left -= proc_skip_spaces(&p);
1269                         if (!left)
1270                                 break;
1271
1272                         err = proc_get_long(&p, &left, &val, &neg,
1273                                              proc_wspace_sep,
1274                                              sizeof(proc_wspace_sep), NULL);
1275                         if (err)
1276                                 break;
1277                         if (neg)
1278                                 continue;
1279                         val = convmul * val / convdiv;
1280                         if ((min && val < *min) || (max && val > *max)) {
1281                                 err = -EINVAL;
1282                                 break;
1283                         }
1284                         WRITE_ONCE(*i, val);
1285                 } else {
1286                         val = convdiv * READ_ONCE(*i) / convmul;
1287                         if (!first)
1288                                 proc_put_char(&buffer, &left, '\t');
1289                         proc_put_long(&buffer, &left, val, false);
1290                 }
1291         }
1292
1293         if (!write && !first && left && !err)
1294                 proc_put_char(&buffer, &left, '\n');
1295         if (write && !err)
1296                 left -= proc_skip_spaces(&p);
1297         if (write && first)
1298                 return err ? : -EINVAL;
1299         *lenp -= left;
1300 out:
1301         *ppos += *lenp;
1302         return err;
1303 }
1304
1305 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
1306                 void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
1307                 unsigned long convdiv)
1308 {
1309         return __do_proc_doulongvec_minmax(table->data, table, write,
1310                         buffer, lenp, ppos, convmul, convdiv);
1311 }
1312
1313 /**
1314  * proc_doulongvec_minmax - read a vector of long integers with min/max values
1315  * @table: the sysctl table
1316  * @write: %TRUE if this is a write to the sysctl file
1317  * @buffer: the user buffer
1318  * @lenp: the size of the user buffer
1319  * @ppos: file position
1320  *
1321  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1322  * values from/to the user buffer, treated as an ASCII string.
1323  *
1324  * This routine will ensure the values are within the range specified by
1325  * table->extra1 (min) and table->extra2 (max).
1326  *
1327  * Returns 0 on success.
1328  */
1329 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1330                            void *buffer, size_t *lenp, loff_t *ppos)
1331 {
1332     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1333 }
1334
1335 /**
1336  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1337  * @table: the sysctl table
1338  * @write: %TRUE if this is a write to the sysctl file
1339  * @buffer: the user buffer
1340  * @lenp: the size of the user buffer
1341  * @ppos: file position
1342  *
1343  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1344  * values from/to the user buffer, treated as an ASCII string. The values
1345  * are treated as milliseconds, and converted to jiffies when they are stored.
1346  *
1347  * This routine will ensure the values are within the range specified by
1348  * table->extra1 (min) and table->extra2 (max).
1349  *
1350  * Returns 0 on success.
1351  */
1352 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1353                                       void *buffer, size_t *lenp, loff_t *ppos)
1354 {
1355     return do_proc_doulongvec_minmax(table, write, buffer,
1356                                      lenp, ppos, HZ, 1000l);
1357 }
1358
1359
1360 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1361                                          int *valp,
1362                                          int write, void *data)
1363 {
1364         if (write) {
1365                 if (*lvalp > INT_MAX / HZ)
1366                         return 1;
1367                 if (*negp)
1368                         WRITE_ONCE(*valp, -*lvalp * HZ);
1369                 else
1370                         WRITE_ONCE(*valp, *lvalp * HZ);
1371         } else {
1372                 int val = READ_ONCE(*valp);
1373                 unsigned long lval;
1374                 if (val < 0) {
1375                         *negp = true;
1376                         lval = -(unsigned long)val;
1377                 } else {
1378                         *negp = false;
1379                         lval = (unsigned long)val;
1380                 }
1381                 *lvalp = lval / HZ;
1382         }
1383         return 0;
1384 }
1385
1386 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1387                                                 int *valp,
1388                                                 int write, void *data)
1389 {
1390         if (write) {
1391                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1392                         return 1;
1393                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1394         } else {
1395                 int val = *valp;
1396                 unsigned long lval;
1397                 if (val < 0) {
1398                         *negp = true;
1399                         lval = -(unsigned long)val;
1400                 } else {
1401                         *negp = false;
1402                         lval = (unsigned long)val;
1403                 }
1404                 *lvalp = jiffies_to_clock_t(lval);
1405         }
1406         return 0;
1407 }
1408
1409 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1410                                             int *valp,
1411                                             int write, void *data)
1412 {
1413         if (write) {
1414                 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1415
1416                 if (jif > INT_MAX)
1417                         return 1;
1418                 *valp = (int)jif;
1419         } else {
1420                 int val = *valp;
1421                 unsigned long lval;
1422                 if (val < 0) {
1423                         *negp = true;
1424                         lval = -(unsigned long)val;
1425                 } else {
1426                         *negp = false;
1427                         lval = (unsigned long)val;
1428                 }
1429                 *lvalp = jiffies_to_msecs(lval);
1430         }
1431         return 0;
1432 }
1433
1434 /**
1435  * proc_dointvec_jiffies - read a vector of integers as seconds
1436  * @table: the sysctl table
1437  * @write: %TRUE if this is a write to the sysctl file
1438  * @buffer: the user buffer
1439  * @lenp: the size of the user buffer
1440  * @ppos: file position
1441  *
1442  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1443  * values from/to the user buffer, treated as an ASCII string. 
1444  * The values read are assumed to be in seconds, and are converted into
1445  * jiffies.
1446  *
1447  * Returns 0 on success.
1448  */
1449 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1450                           void *buffer, size_t *lenp, loff_t *ppos)
1451 {
1452     return do_proc_dointvec(table,write,buffer,lenp,ppos,
1453                             do_proc_dointvec_jiffies_conv,NULL);
1454 }
1455
1456 /**
1457  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
1458  * @table: the sysctl table
1459  * @write: %TRUE if this is a write to the sysctl file
1460  * @buffer: the user buffer
1461  * @lenp: the size of the user buffer
1462  * @ppos: pointer to the file position
1463  *
1464  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1465  * values from/to the user buffer, treated as an ASCII string. 
1466  * The values read are assumed to be in 1/USER_HZ seconds, and 
1467  * are converted into jiffies.
1468  *
1469  * Returns 0 on success.
1470  */
1471 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1472                                  void *buffer, size_t *lenp, loff_t *ppos)
1473 {
1474     return do_proc_dointvec(table,write,buffer,lenp,ppos,
1475                             do_proc_dointvec_userhz_jiffies_conv,NULL);
1476 }
1477
1478 /**
1479  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
1480  * @table: the sysctl table
1481  * @write: %TRUE if this is a write to the sysctl file
1482  * @buffer: the user buffer
1483  * @lenp: the size of the user buffer
1484  * @ppos: file position
1485  * @ppos: the current position in the file
1486  *
1487  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1488  * values from/to the user buffer, treated as an ASCII string. 
1489  * The values read are assumed to be in 1/1000 seconds, and 
1490  * are converted into jiffies.
1491  *
1492  * Returns 0 on success.
1493  */
1494 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
1495                 size_t *lenp, loff_t *ppos)
1496 {
1497         return do_proc_dointvec(table, write, buffer, lenp, ppos,
1498                                 do_proc_dointvec_ms_jiffies_conv, NULL);
1499 }
1500
1501 static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
1502                 size_t *lenp, loff_t *ppos)
1503 {
1504         struct pid *new_pid;
1505         pid_t tmp;
1506         int r;
1507
1508         tmp = pid_vnr(cad_pid);
1509
1510         r = __do_proc_dointvec(&tmp, table, write, buffer,
1511                                lenp, ppos, NULL, NULL);
1512         if (r || !write)
1513                 return r;
1514
1515         new_pid = find_get_pid(tmp);
1516         if (!new_pid)
1517                 return -ESRCH;
1518
1519         put_pid(xchg(&cad_pid, new_pid));
1520         return 0;
1521 }
1522
1523 /**
1524  * proc_do_large_bitmap - read/write from/to a large bitmap
1525  * @table: the sysctl table
1526  * @write: %TRUE if this is a write to the sysctl file
1527  * @buffer: the user buffer
1528  * @lenp: the size of the user buffer
1529  * @ppos: file position
1530  *
1531  * The bitmap is stored at table->data and the bitmap length (in bits)
1532  * in table->maxlen.
1533  *
1534  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
1535  * large bitmaps may be represented in a compact manner. Writing into
1536  * the file will clear the bitmap then update it with the given input.
1537  *
1538  * Returns 0 on success.
1539  */
1540 int proc_do_large_bitmap(struct ctl_table *table, int write,
1541                          void *buffer, size_t *lenp, loff_t *ppos)
1542 {
1543         int err = 0;
1544         size_t left = *lenp;
1545         unsigned long bitmap_len = table->maxlen;
1546         unsigned long *bitmap = *(unsigned long **) table->data;
1547         unsigned long *tmp_bitmap = NULL;
1548         char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
1549
1550         if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
1551                 *lenp = 0;
1552                 return 0;
1553         }
1554
1555         if (write) {
1556                 char *p = buffer;
1557                 size_t skipped = 0;
1558
1559                 if (left > PAGE_SIZE - 1) {
1560                         left = PAGE_SIZE - 1;
1561                         /* How much of the buffer we'll skip this pass */
1562                         skipped = *lenp - left;
1563                 }
1564
1565                 tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
1566                 if (!tmp_bitmap)
1567                         return -ENOMEM;
1568                 proc_skip_char(&p, &left, '\n');
1569                 while (!err && left) {
1570                         unsigned long val_a, val_b;
1571                         bool neg;
1572                         size_t saved_left;
1573
1574                         /* In case we stop parsing mid-number, we can reset */
1575                         saved_left = left;
1576                         err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
1577                                              sizeof(tr_a), &c);
1578                         /*
1579                          * If we consumed the entirety of a truncated buffer or
1580                          * only one char is left (may be a "-"), then stop here,
1581                          * reset, & come back for more.
1582                          */
1583                         if ((left <= 1) && skipped) {
1584                                 left = saved_left;
1585                                 break;
1586                         }
1587
1588                         if (err)
1589                                 break;
1590                         if (val_a >= bitmap_len || neg) {
1591                                 err = -EINVAL;
1592                                 break;
1593                         }
1594
1595                         val_b = val_a;
1596                         if (left) {
1597                                 p++;
1598                                 left--;
1599                         }
1600
1601                         if (c == '-') {
1602                                 err = proc_get_long(&p, &left, &val_b,
1603                                                      &neg, tr_b, sizeof(tr_b),
1604                                                      &c);
1605                                 /*
1606                                  * If we consumed all of a truncated buffer or
1607                                  * then stop here, reset, & come back for more.
1608                                  */
1609                                 if (!left && skipped) {
1610                                         left = saved_left;
1611                                         break;
1612                                 }
1613
1614                                 if (err)
1615                                         break;
1616                                 if (val_b >= bitmap_len || neg ||
1617                                     val_a > val_b) {
1618                                         err = -EINVAL;
1619                                         break;
1620                                 }
1621                                 if (left) {
1622                                         p++;
1623                                         left--;
1624                                 }
1625                         }
1626
1627                         bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
1628                         proc_skip_char(&p, &left, '\n');
1629                 }
1630                 left += skipped;
1631         } else {
1632                 unsigned long bit_a, bit_b = 0;
1633                 bool first = 1;
1634
1635                 while (left) {
1636                         bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
1637                         if (bit_a >= bitmap_len)
1638                                 break;
1639                         bit_b = find_next_zero_bit(bitmap, bitmap_len,
1640                                                    bit_a + 1) - 1;
1641
1642                         if (!first)
1643                                 proc_put_char(&buffer, &left, ',');
1644                         proc_put_long(&buffer, &left, bit_a, false);
1645                         if (bit_a != bit_b) {
1646                                 proc_put_char(&buffer, &left, '-');
1647                                 proc_put_long(&buffer, &left, bit_b, false);
1648                         }
1649
1650                         first = 0; bit_b++;
1651                 }
1652                 proc_put_char(&buffer, &left, '\n');
1653         }
1654
1655         if (!err) {
1656                 if (write) {
1657                         if (*ppos)
1658                                 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
1659                         else
1660                                 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
1661                 }
1662                 *lenp -= left;
1663                 *ppos += *lenp;
1664         }
1665
1666         bitmap_free(tmp_bitmap);
1667         return err;
1668 }
1669
1670 #else /* CONFIG_PROC_SYSCTL */
1671
1672 int proc_dostring(struct ctl_table *table, int write,
1673                   void *buffer, size_t *lenp, loff_t *ppos)
1674 {
1675         return -ENOSYS;
1676 }
1677
1678 int proc_dobool(struct ctl_table *table, int write,
1679                 void *buffer, size_t *lenp, loff_t *ppos)
1680 {
1681         return -ENOSYS;
1682 }
1683
1684 int proc_dointvec(struct ctl_table *table, int write,
1685                   void *buffer, size_t *lenp, loff_t *ppos)
1686 {
1687         return -ENOSYS;
1688 }
1689
1690 int proc_douintvec(struct ctl_table *table, int write,
1691                   void *buffer, size_t *lenp, loff_t *ppos)
1692 {
1693         return -ENOSYS;
1694 }
1695
1696 int proc_dointvec_minmax(struct ctl_table *table, int write,
1697                     void *buffer, size_t *lenp, loff_t *ppos)
1698 {
1699         return -ENOSYS;
1700 }
1701
1702 int proc_douintvec_minmax(struct ctl_table *table, int write,
1703                           void *buffer, size_t *lenp, loff_t *ppos)
1704 {
1705         return -ENOSYS;
1706 }
1707
1708 int proc_dou8vec_minmax(struct ctl_table *table, int write,
1709                         void *buffer, size_t *lenp, loff_t *ppos)
1710 {
1711         return -ENOSYS;
1712 }
1713
1714 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1715                     void *buffer, size_t *lenp, loff_t *ppos)
1716 {
1717         return -ENOSYS;
1718 }
1719
1720 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1721                     void *buffer, size_t *lenp, loff_t *ppos)
1722 {
1723         return -ENOSYS;
1724 }
1725
1726 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
1727                              void *buffer, size_t *lenp, loff_t *ppos)
1728 {
1729         return -ENOSYS;
1730 }
1731
1732 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1733                     void *buffer, size_t *lenp, loff_t *ppos)
1734 {
1735         return -ENOSYS;
1736 }
1737
1738 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1739                                       void *buffer, size_t *lenp, loff_t *ppos)
1740 {
1741         return -ENOSYS;
1742 }
1743
1744 int proc_do_large_bitmap(struct ctl_table *table, int write,
1745                          void *buffer, size_t *lenp, loff_t *ppos)
1746 {
1747         return -ENOSYS;
1748 }
1749
1750 #endif /* CONFIG_PROC_SYSCTL */
1751
1752 #if defined(CONFIG_SYSCTL)
1753 int proc_do_static_key(struct ctl_table *table, int write,
1754                        void *buffer, size_t *lenp, loff_t *ppos)
1755 {
1756         struct static_key *key = (struct static_key *)table->data;
1757         static DEFINE_MUTEX(static_key_mutex);
1758         int val, ret;
1759         struct ctl_table tmp = {
1760                 .data   = &val,
1761                 .maxlen = sizeof(val),
1762                 .mode   = table->mode,
1763                 .extra1 = SYSCTL_ZERO,
1764                 .extra2 = SYSCTL_ONE,
1765         };
1766
1767         if (write && !capable(CAP_SYS_ADMIN))
1768                 return -EPERM;
1769
1770         mutex_lock(&static_key_mutex);
1771         val = static_key_enabled(key);
1772         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1773         if (write && !ret) {
1774                 if (val)
1775                         static_key_enable(key);
1776                 else
1777                         static_key_disable(key);
1778         }
1779         mutex_unlock(&static_key_mutex);
1780         return ret;
1781 }
1782
1783 static struct ctl_table kern_table[] = {
1784         {
1785                 .procname       = "sched_child_runs_first",
1786                 .data           = &sysctl_sched_child_runs_first,
1787                 .maxlen         = sizeof(unsigned int),
1788                 .mode           = 0644,
1789                 .proc_handler   = proc_dointvec,
1790         },
1791 #ifdef CONFIG_SCHEDSTATS
1792         {
1793                 .procname       = "sched_schedstats",
1794                 .data           = NULL,
1795                 .maxlen         = sizeof(unsigned int),
1796                 .mode           = 0644,
1797                 .proc_handler   = sysctl_schedstats,
1798                 .extra1         = SYSCTL_ZERO,
1799                 .extra2         = SYSCTL_ONE,
1800         },
1801 #endif /* CONFIG_SCHEDSTATS */
1802 #ifdef CONFIG_TASK_DELAY_ACCT
1803         {
1804                 .procname       = "task_delayacct",
1805                 .data           = NULL,
1806                 .maxlen         = sizeof(unsigned int),
1807                 .mode           = 0644,
1808                 .proc_handler   = sysctl_delayacct,
1809                 .extra1         = SYSCTL_ZERO,
1810                 .extra2         = SYSCTL_ONE,
1811         },
1812 #endif /* CONFIG_TASK_DELAY_ACCT */
1813 #ifdef CONFIG_NUMA_BALANCING
1814         {
1815                 .procname       = "numa_balancing",
1816                 .data           = NULL, /* filled in by handler */
1817                 .maxlen         = sizeof(unsigned int),
1818                 .mode           = 0644,
1819                 .proc_handler   = sysctl_numa_balancing,
1820                 .extra1         = SYSCTL_ZERO,
1821                 .extra2         = SYSCTL_ONE,
1822         },
1823 #endif /* CONFIG_NUMA_BALANCING */
1824         {
1825                 .procname       = "sched_rt_period_us",
1826                 .data           = &sysctl_sched_rt_period,
1827                 .maxlen         = sizeof(unsigned int),
1828                 .mode           = 0644,
1829                 .proc_handler   = sched_rt_handler,
1830         },
1831         {
1832                 .procname       = "sched_rt_runtime_us",
1833                 .data           = &sysctl_sched_rt_runtime,
1834                 .maxlen         = sizeof(int),
1835                 .mode           = 0644,
1836                 .proc_handler   = sched_rt_handler,
1837         },
1838         {
1839                 .procname       = "sched_deadline_period_max_us",
1840                 .data           = &sysctl_sched_dl_period_max,
1841                 .maxlen         = sizeof(unsigned int),
1842                 .mode           = 0644,
1843                 .proc_handler   = proc_dointvec,
1844         },
1845         {
1846                 .procname       = "sched_deadline_period_min_us",
1847                 .data           = &sysctl_sched_dl_period_min,
1848                 .maxlen         = sizeof(unsigned int),
1849                 .mode           = 0644,
1850                 .proc_handler   = proc_dointvec,
1851         },
1852         {
1853                 .procname       = "sched_rr_timeslice_ms",
1854                 .data           = &sysctl_sched_rr_timeslice,
1855                 .maxlen         = sizeof(int),
1856                 .mode           = 0644,
1857                 .proc_handler   = sched_rr_handler,
1858         },
1859 #ifdef CONFIG_UCLAMP_TASK
1860         {
1861                 .procname       = "sched_util_clamp_min",
1862                 .data           = &sysctl_sched_uclamp_util_min,
1863                 .maxlen         = sizeof(unsigned int),
1864                 .mode           = 0644,
1865                 .proc_handler   = sysctl_sched_uclamp_handler,
1866         },
1867         {
1868                 .procname       = "sched_util_clamp_max",
1869                 .data           = &sysctl_sched_uclamp_util_max,
1870                 .maxlen         = sizeof(unsigned int),
1871                 .mode           = 0644,
1872                 .proc_handler   = sysctl_sched_uclamp_handler,
1873         },
1874         {
1875                 .procname       = "sched_util_clamp_min_rt_default",
1876                 .data           = &sysctl_sched_uclamp_util_min_rt_default,
1877                 .maxlen         = sizeof(unsigned int),
1878                 .mode           = 0644,
1879                 .proc_handler   = sysctl_sched_uclamp_handler,
1880         },
1881 #endif
1882 #ifdef CONFIG_SCHED_AUTOGROUP
1883         {
1884                 .procname       = "sched_autogroup_enabled",
1885                 .data           = &sysctl_sched_autogroup_enabled,
1886                 .maxlen         = sizeof(unsigned int),
1887                 .mode           = 0644,
1888                 .proc_handler   = proc_dointvec_minmax,
1889                 .extra1         = SYSCTL_ZERO,
1890                 .extra2         = SYSCTL_ONE,
1891         },
1892 #endif
1893 #ifdef CONFIG_CFS_BANDWIDTH
1894         {
1895                 .procname       = "sched_cfs_bandwidth_slice_us",
1896                 .data           = &sysctl_sched_cfs_bandwidth_slice,
1897                 .maxlen         = sizeof(unsigned int),
1898                 .mode           = 0644,
1899                 .proc_handler   = proc_dointvec_minmax,
1900                 .extra1         = SYSCTL_ONE,
1901         },
1902 #endif
1903 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
1904         {
1905                 .procname       = "sched_energy_aware",
1906                 .data           = &sysctl_sched_energy_aware,
1907                 .maxlen         = sizeof(unsigned int),
1908                 .mode           = 0644,
1909                 .proc_handler   = sched_energy_aware_handler,
1910                 .extra1         = SYSCTL_ZERO,
1911                 .extra2         = SYSCTL_ONE,
1912         },
1913 #endif
1914 #ifdef CONFIG_PROVE_LOCKING
1915         {
1916                 .procname       = "prove_locking",
1917                 .data           = &prove_locking,
1918                 .maxlen         = sizeof(int),
1919                 .mode           = 0644,
1920                 .proc_handler   = proc_dointvec,
1921         },
1922 #endif
1923 #ifdef CONFIG_LOCK_STAT
1924         {
1925                 .procname       = "lock_stat",
1926                 .data           = &lock_stat,
1927                 .maxlen         = sizeof(int),
1928                 .mode           = 0644,
1929                 .proc_handler   = proc_dointvec,
1930         },
1931 #endif
1932         {
1933                 .procname       = "panic",
1934                 .data           = &panic_timeout,
1935                 .maxlen         = sizeof(int),
1936                 .mode           = 0644,
1937                 .proc_handler   = proc_dointvec,
1938         },
1939 #ifdef CONFIG_COREDUMP
1940         {
1941                 .procname       = "core_uses_pid",
1942                 .data           = &core_uses_pid,
1943                 .maxlen         = sizeof(int),
1944                 .mode           = 0644,
1945                 .proc_handler   = proc_dointvec,
1946         },
1947         {
1948                 .procname       = "core_pattern",
1949                 .data           = core_pattern,
1950                 .maxlen         = CORENAME_MAX_SIZE,
1951                 .mode           = 0644,
1952                 .proc_handler   = proc_dostring_coredump,
1953         },
1954         {
1955                 .procname       = "core_pipe_limit",
1956                 .data           = &core_pipe_limit,
1957                 .maxlen         = sizeof(unsigned int),
1958                 .mode           = 0644,
1959                 .proc_handler   = proc_dointvec,
1960         },
1961 #endif
1962 #ifdef CONFIG_PROC_SYSCTL
1963         {
1964                 .procname       = "tainted",
1965                 .maxlen         = sizeof(long),
1966                 .mode           = 0644,
1967                 .proc_handler   = proc_taint,
1968         },
1969         {
1970                 .procname       = "sysctl_writes_strict",
1971                 .data           = &sysctl_writes_strict,
1972                 .maxlen         = sizeof(int),
1973                 .mode           = 0644,
1974                 .proc_handler   = proc_dointvec_minmax,
1975                 .extra1         = &neg_one,
1976                 .extra2         = SYSCTL_ONE,
1977         },
1978 #endif
1979 #ifdef CONFIG_LATENCYTOP
1980         {
1981                 .procname       = "latencytop",
1982                 .data           = &latencytop_enabled,
1983                 .maxlen         = sizeof(int),
1984                 .mode           = 0644,
1985                 .proc_handler   = sysctl_latencytop,
1986         },
1987 #endif
1988 #ifdef CONFIG_BLK_DEV_INITRD
1989         {
1990                 .procname       = "real-root-dev",
1991                 .data           = &real_root_dev,
1992                 .maxlen         = sizeof(int),
1993                 .mode           = 0644,
1994                 .proc_handler   = proc_dointvec,
1995         },
1996 #endif
1997         {
1998                 .procname       = "print-fatal-signals",
1999                 .data           = &print_fatal_signals,
2000                 .maxlen         = sizeof(int),
2001                 .mode           = 0644,
2002                 .proc_handler   = proc_dointvec,
2003         },
2004 #ifdef CONFIG_SPARC
2005         {
2006                 .procname       = "reboot-cmd",
2007                 .data           = reboot_command,
2008                 .maxlen         = 256,
2009                 .mode           = 0644,
2010                 .proc_handler   = proc_dostring,
2011         },
2012         {
2013                 .procname       = "stop-a",
2014                 .data           = &stop_a_enabled,
2015                 .maxlen         = sizeof (int),
2016                 .mode           = 0644,
2017                 .proc_handler   = proc_dointvec,
2018         },
2019         {
2020                 .procname       = "scons-poweroff",
2021                 .data           = &scons_pwroff,
2022                 .maxlen         = sizeof (int),
2023                 .mode           = 0644,
2024                 .proc_handler   = proc_dointvec,
2025         },
2026 #endif
2027 #ifdef CONFIG_SPARC64
2028         {
2029                 .procname       = "tsb-ratio",
2030                 .data           = &sysctl_tsb_ratio,
2031                 .maxlen         = sizeof (int),
2032                 .mode           = 0644,
2033                 .proc_handler   = proc_dointvec,
2034         },
2035 #endif
2036 #ifdef CONFIG_PARISC
2037         {
2038                 .procname       = "soft-power",
2039                 .data           = &pwrsw_enabled,
2040                 .maxlen         = sizeof (int),
2041                 .mode           = 0644,
2042                 .proc_handler   = proc_dointvec,
2043         },
2044 #endif
2045 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
2046         {
2047                 .procname       = "unaligned-trap",
2048                 .data           = &unaligned_enabled,
2049                 .maxlen         = sizeof (int),
2050                 .mode           = 0644,
2051                 .proc_handler   = proc_dointvec,
2052         },
2053 #endif
2054         {
2055                 .procname       = "ctrl-alt-del",
2056                 .data           = &C_A_D,
2057                 .maxlen         = sizeof(int),
2058                 .mode           = 0644,
2059                 .proc_handler   = proc_dointvec,
2060         },
2061 #ifdef CONFIG_FUNCTION_TRACER
2062         {
2063                 .procname       = "ftrace_enabled",
2064                 .data           = &ftrace_enabled,
2065                 .maxlen         = sizeof(int),
2066                 .mode           = 0644,
2067                 .proc_handler   = ftrace_enable_sysctl,
2068         },
2069 #endif
2070 #ifdef CONFIG_STACK_TRACER
2071         {
2072                 .procname       = "stack_tracer_enabled",
2073                 .data           = &stack_tracer_enabled,
2074                 .maxlen         = sizeof(int),
2075                 .mode           = 0644,
2076                 .proc_handler   = stack_trace_sysctl,
2077         },
2078 #endif
2079 #ifdef CONFIG_TRACING
2080         {
2081                 .procname       = "ftrace_dump_on_oops",
2082                 .data           = &ftrace_dump_on_oops,
2083                 .maxlen         = sizeof(int),
2084                 .mode           = 0644,
2085                 .proc_handler   = proc_dointvec,
2086         },
2087         {
2088                 .procname       = "traceoff_on_warning",
2089                 .data           = &__disable_trace_on_warning,
2090                 .maxlen         = sizeof(__disable_trace_on_warning),
2091                 .mode           = 0644,
2092                 .proc_handler   = proc_dointvec,
2093         },
2094         {
2095                 .procname       = "tracepoint_printk",
2096                 .data           = &tracepoint_printk,
2097                 .maxlen         = sizeof(tracepoint_printk),
2098                 .mode           = 0644,
2099                 .proc_handler   = tracepoint_printk_sysctl,
2100         },
2101 #endif
2102 #ifdef CONFIG_KEXEC_CORE
2103         {
2104                 .procname       = "kexec_load_disabled",
2105                 .data           = &kexec_load_disabled,
2106                 .maxlen         = sizeof(int),
2107                 .mode           = 0644,
2108                 /* only handle a transition from default "0" to "1" */
2109                 .proc_handler   = proc_dointvec_minmax,
2110                 .extra1         = SYSCTL_ONE,
2111                 .extra2         = SYSCTL_ONE,
2112         },
2113 #endif
2114 #ifdef CONFIG_MODULES
2115         {
2116                 .procname       = "modprobe",
2117                 .data           = &modprobe_path,
2118                 .maxlen         = KMOD_PATH_LEN,
2119                 .mode           = 0644,
2120                 .proc_handler   = proc_dostring,
2121         },
2122         {
2123                 .procname       = "modules_disabled",
2124                 .data           = &modules_disabled,
2125                 .maxlen         = sizeof(int),
2126                 .mode           = 0644,
2127                 /* only handle a transition from default "0" to "1" */
2128                 .proc_handler   = proc_dointvec_minmax,
2129                 .extra1         = SYSCTL_ONE,
2130                 .extra2         = SYSCTL_ONE,
2131         },
2132 #endif
2133 #ifdef CONFIG_UEVENT_HELPER
2134         {
2135                 .procname       = "hotplug",
2136                 .data           = &uevent_helper,
2137                 .maxlen         = UEVENT_HELPER_PATH_LEN,
2138                 .mode           = 0644,
2139                 .proc_handler   = proc_dostring,
2140         },
2141 #endif
2142 #ifdef CONFIG_CHR_DEV_SG
2143         {
2144                 .procname       = "sg-big-buff",
2145                 .data           = &sg_big_buff,
2146                 .maxlen         = sizeof (int),
2147                 .mode           = 0444,
2148                 .proc_handler   = proc_dointvec,
2149         },
2150 #endif
2151 #ifdef CONFIG_BSD_PROCESS_ACCT
2152         {
2153                 .procname       = "acct",
2154                 .data           = &acct_parm,
2155                 .maxlen         = 3*sizeof(int),
2156                 .mode           = 0644,
2157                 .proc_handler   = proc_dointvec,
2158         },
2159 #endif
2160 #ifdef CONFIG_MAGIC_SYSRQ
2161         {
2162                 .procname       = "sysrq",
2163                 .data           = NULL,
2164                 .maxlen         = sizeof (int),
2165                 .mode           = 0644,
2166                 .proc_handler   = sysrq_sysctl_handler,
2167         },
2168 #endif
2169 #ifdef CONFIG_PROC_SYSCTL
2170         {
2171                 .procname       = "cad_pid",
2172                 .data           = NULL,
2173                 .maxlen         = sizeof (int),
2174                 .mode           = 0600,
2175                 .proc_handler   = proc_do_cad_pid,
2176         },
2177 #endif
2178         {
2179                 .procname       = "threads-max",
2180                 .data           = NULL,
2181                 .maxlen         = sizeof(int),
2182                 .mode           = 0644,
2183                 .proc_handler   = sysctl_max_threads,
2184         },
2185         {
2186                 .procname       = "random",
2187                 .mode           = 0555,
2188                 .child          = random_table,
2189         },
2190         {
2191                 .procname       = "usermodehelper",
2192                 .mode           = 0555,
2193                 .child          = usermodehelper_table,
2194         },
2195 #ifdef CONFIG_FW_LOADER_USER_HELPER
2196         {
2197                 .procname       = "firmware_config",
2198                 .mode           = 0555,
2199                 .child          = firmware_config_table,
2200         },
2201 #endif
2202         {
2203                 .procname       = "overflowuid",
2204                 .data           = &overflowuid,
2205                 .maxlen         = sizeof(int),
2206                 .mode           = 0644,
2207                 .proc_handler   = proc_dointvec_minmax,
2208                 .extra1         = &minolduid,
2209                 .extra2         = &maxolduid,
2210         },
2211         {
2212                 .procname       = "overflowgid",
2213                 .data           = &overflowgid,
2214                 .maxlen         = sizeof(int),
2215                 .mode           = 0644,
2216                 .proc_handler   = proc_dointvec_minmax,
2217                 .extra1         = &minolduid,
2218                 .extra2         = &maxolduid,
2219         },
2220 #ifdef CONFIG_S390
2221         {
2222                 .procname       = "userprocess_debug",
2223                 .data           = &show_unhandled_signals,
2224                 .maxlen         = sizeof(int),
2225                 .mode           = 0644,
2226                 .proc_handler   = proc_dointvec,
2227         },
2228 #endif
2229 #ifdef CONFIG_SMP
2230         {
2231                 .procname       = "oops_all_cpu_backtrace",
2232                 .data           = &sysctl_oops_all_cpu_backtrace,
2233                 .maxlen         = sizeof(int),
2234                 .mode           = 0644,
2235                 .proc_handler   = proc_dointvec_minmax,
2236                 .extra1         = SYSCTL_ZERO,
2237                 .extra2         = SYSCTL_ONE,
2238         },
2239 #endif /* CONFIG_SMP */
2240         {
2241                 .procname       = "pid_max",
2242                 .data           = &pid_max,
2243                 .maxlen         = sizeof (int),
2244                 .mode           = 0644,
2245                 .proc_handler   = proc_dointvec_minmax,
2246                 .extra1         = &pid_max_min,
2247                 .extra2         = &pid_max_max,
2248         },
2249         {
2250                 .procname       = "panic_on_oops",
2251                 .data           = &panic_on_oops,
2252                 .maxlen         = sizeof(int),
2253                 .mode           = 0644,
2254                 .proc_handler   = proc_dointvec,
2255         },
2256         {
2257                 .procname       = "panic_print",
2258                 .data           = &panic_print,
2259                 .maxlen         = sizeof(unsigned long),
2260                 .mode           = 0644,
2261                 .proc_handler   = proc_doulongvec_minmax,
2262         },
2263 #if defined CONFIG_PRINTK
2264         {
2265                 .procname       = "printk",
2266                 .data           = &console_loglevel,
2267                 .maxlen         = 4*sizeof(int),
2268                 .mode           = 0644,
2269                 .proc_handler   = proc_dointvec,
2270         },
2271         {
2272                 .procname       = "printk_ratelimit",
2273                 .data           = &printk_ratelimit_state.interval,
2274                 .maxlen         = sizeof(int),
2275                 .mode           = 0644,
2276                 .proc_handler   = proc_dointvec_jiffies,
2277         },
2278         {
2279                 .procname       = "printk_ratelimit_burst",
2280                 .data           = &printk_ratelimit_state.burst,
2281                 .maxlen         = sizeof(int),
2282                 .mode           = 0644,
2283                 .proc_handler   = proc_dointvec,
2284         },
2285         {
2286                 .procname       = "printk_delay",
2287                 .data           = &printk_delay_msec,
2288                 .maxlen         = sizeof(int),
2289                 .mode           = 0644,
2290                 .proc_handler   = proc_dointvec_minmax,
2291                 .extra1         = SYSCTL_ZERO,
2292                 .extra2         = &ten_thousand,
2293         },
2294         {
2295                 .procname       = "printk_devkmsg",
2296                 .data           = devkmsg_log_str,
2297                 .maxlen         = DEVKMSG_STR_MAX_SIZE,
2298                 .mode           = 0644,
2299                 .proc_handler   = devkmsg_sysctl_set_loglvl,
2300         },
2301         {
2302                 .procname       = "dmesg_restrict",
2303                 .data           = &dmesg_restrict,
2304                 .maxlen         = sizeof(int),
2305                 .mode           = 0644,
2306                 .proc_handler   = proc_dointvec_minmax_sysadmin,
2307                 .extra1         = SYSCTL_ZERO,
2308                 .extra2         = SYSCTL_ONE,
2309         },
2310         {
2311                 .procname       = "kptr_restrict",
2312                 .data           = &kptr_restrict,
2313                 .maxlen         = sizeof(int),
2314                 .mode           = 0644,
2315                 .proc_handler   = proc_dointvec_minmax_sysadmin,
2316                 .extra1         = SYSCTL_ZERO,
2317                 .extra2         = &two,
2318         },
2319 #endif
2320         {
2321                 .procname       = "ngroups_max",
2322                 .data           = &ngroups_max,
2323                 .maxlen         = sizeof (int),
2324                 .mode           = 0444,
2325                 .proc_handler   = proc_dointvec,
2326         },
2327         {
2328                 .procname       = "cap_last_cap",
2329                 .data           = (void *)&cap_last_cap,
2330                 .maxlen         = sizeof(int),
2331                 .mode           = 0444,
2332                 .proc_handler   = proc_dointvec,
2333         },
2334 #if defined(CONFIG_LOCKUP_DETECTOR)
2335         {
2336                 .procname       = "watchdog",
2337                 .data           = &watchdog_user_enabled,
2338                 .maxlen         = sizeof(int),
2339                 .mode           = 0644,
2340                 .proc_handler   = proc_watchdog,
2341                 .extra1         = SYSCTL_ZERO,
2342                 .extra2         = SYSCTL_ONE,
2343         },
2344         {
2345                 .procname       = "watchdog_thresh",
2346                 .data           = &watchdog_thresh,
2347                 .maxlen         = sizeof(int),
2348                 .mode           = 0644,
2349                 .proc_handler   = proc_watchdog_thresh,
2350                 .extra1         = SYSCTL_ZERO,
2351                 .extra2         = &sixty,
2352         },
2353         {
2354                 .procname       = "nmi_watchdog",
2355                 .data           = &nmi_watchdog_user_enabled,
2356                 .maxlen         = sizeof(int),
2357                 .mode           = NMI_WATCHDOG_SYSCTL_PERM,
2358                 .proc_handler   = proc_nmi_watchdog,
2359                 .extra1         = SYSCTL_ZERO,
2360                 .extra2         = SYSCTL_ONE,
2361         },
2362         {
2363                 .procname       = "watchdog_cpumask",
2364                 .data           = &watchdog_cpumask_bits,
2365                 .maxlen         = NR_CPUS,
2366                 .mode           = 0644,
2367                 .proc_handler   = proc_watchdog_cpumask,
2368         },
2369 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
2370         {
2371                 .procname       = "soft_watchdog",
2372                 .data           = &soft_watchdog_user_enabled,
2373                 .maxlen         = sizeof(int),
2374                 .mode           = 0644,
2375                 .proc_handler   = proc_soft_watchdog,
2376                 .extra1         = SYSCTL_ZERO,
2377                 .extra2         = SYSCTL_ONE,
2378         },
2379         {
2380                 .procname       = "softlockup_panic",
2381                 .data           = &softlockup_panic,
2382                 .maxlen         = sizeof(int),
2383                 .mode           = 0644,
2384                 .proc_handler   = proc_dointvec_minmax,
2385                 .extra1         = SYSCTL_ZERO,
2386                 .extra2         = SYSCTL_ONE,
2387         },
2388 #ifdef CONFIG_SMP
2389         {
2390                 .procname       = "softlockup_all_cpu_backtrace",
2391                 .data           = &sysctl_softlockup_all_cpu_backtrace,
2392                 .maxlen         = sizeof(int),
2393                 .mode           = 0644,
2394                 .proc_handler   = proc_dointvec_minmax,
2395                 .extra1         = SYSCTL_ZERO,
2396                 .extra2         = SYSCTL_ONE,
2397         },
2398 #endif /* CONFIG_SMP */
2399 #endif
2400 #ifdef CONFIG_HARDLOCKUP_DETECTOR
2401         {
2402                 .procname       = "hardlockup_panic",
2403                 .data           = &hardlockup_panic,
2404                 .maxlen         = sizeof(int),
2405                 .mode           = 0644,
2406                 .proc_handler   = proc_dointvec_minmax,
2407                 .extra1         = SYSCTL_ZERO,
2408                 .extra2         = SYSCTL_ONE,
2409         },
2410 #ifdef CONFIG_SMP
2411         {
2412                 .procname       = "hardlockup_all_cpu_backtrace",
2413                 .data           = &sysctl_hardlockup_all_cpu_backtrace,
2414                 .maxlen         = sizeof(int),
2415                 .mode           = 0644,
2416                 .proc_handler   = proc_dointvec_minmax,
2417                 .extra1         = SYSCTL_ZERO,
2418                 .extra2         = SYSCTL_ONE,
2419         },
2420 #endif /* CONFIG_SMP */
2421 #endif
2422 #endif
2423
2424 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
2425         {
2426                 .procname       = "unknown_nmi_panic",
2427                 .data           = &unknown_nmi_panic,
2428                 .maxlen         = sizeof (int),
2429                 .mode           = 0644,
2430                 .proc_handler   = proc_dointvec,
2431         },
2432 #endif
2433
2434 #if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
2435         defined(CONFIG_DEBUG_STACKOVERFLOW)
2436         {
2437                 .procname       = "panic_on_stackoverflow",
2438                 .data           = &sysctl_panic_on_stackoverflow,
2439                 .maxlen         = sizeof(int),
2440                 .mode           = 0644,
2441                 .proc_handler   = proc_dointvec,
2442         },
2443 #endif
2444 #if defined(CONFIG_X86)
2445         {
2446                 .procname       = "panic_on_unrecovered_nmi",
2447                 .data           = &panic_on_unrecovered_nmi,
2448                 .maxlen         = sizeof(int),
2449                 .mode           = 0644,
2450                 .proc_handler   = proc_dointvec,
2451         },
2452         {
2453                 .procname       = "panic_on_io_nmi",
2454                 .data           = &panic_on_io_nmi,
2455                 .maxlen         = sizeof(int),
2456                 .mode           = 0644,
2457                 .proc_handler   = proc_dointvec,
2458         },
2459         {
2460                 .procname       = "bootloader_type",
2461                 .data           = &bootloader_type,
2462                 .maxlen         = sizeof (int),
2463                 .mode           = 0444,
2464                 .proc_handler   = proc_dointvec,
2465         },
2466         {
2467                 .procname       = "bootloader_version",
2468                 .data           = &bootloader_version,
2469                 .maxlen         = sizeof (int),
2470                 .mode           = 0444,
2471                 .proc_handler   = proc_dointvec,
2472         },
2473         {
2474                 .procname       = "io_delay_type",
2475                 .data           = &io_delay_type,
2476                 .maxlen         = sizeof(int),
2477                 .mode           = 0644,
2478                 .proc_handler   = proc_dointvec,
2479         },
2480 #endif
2481 #if defined(CONFIG_MMU)
2482         {
2483                 .procname       = "randomize_va_space",
2484                 .data           = &randomize_va_space,
2485                 .maxlen         = sizeof(int),
2486                 .mode           = 0644,
2487                 .proc_handler   = proc_dointvec,
2488         },
2489 #endif
2490 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
2491         {
2492                 .procname       = "spin_retry",
2493                 .data           = &spin_retry,
2494                 .maxlen         = sizeof (int),
2495                 .mode           = 0644,
2496                 .proc_handler   = proc_dointvec,
2497         },
2498 #endif
2499 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
2500         {
2501                 .procname       = "acpi_video_flags",
2502                 .data           = &acpi_realmode_flags,
2503                 .maxlen         = sizeof (unsigned long),
2504                 .mode           = 0644,
2505                 .proc_handler   = proc_doulongvec_minmax,
2506         },
2507 #endif
2508 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
2509         {
2510                 .procname       = "ignore-unaligned-usertrap",
2511                 .data           = &no_unaligned_warning,
2512                 .maxlen         = sizeof (int),
2513                 .mode           = 0644,
2514                 .proc_handler   = proc_dointvec,
2515         },
2516 #endif
2517 #ifdef CONFIG_IA64
2518         {
2519                 .procname       = "unaligned-dump-stack",
2520                 .data           = &unaligned_dump_stack,
2521                 .maxlen         = sizeof (int),
2522                 .mode           = 0644,
2523                 .proc_handler   = proc_dointvec,
2524         },
2525 #endif
2526 #ifdef CONFIG_DETECT_HUNG_TASK
2527 #ifdef CONFIG_SMP
2528         {
2529                 .procname       = "hung_task_all_cpu_backtrace",
2530                 .data           = &sysctl_hung_task_all_cpu_backtrace,
2531                 .maxlen         = sizeof(int),
2532                 .mode           = 0644,
2533                 .proc_handler   = proc_dointvec_minmax,
2534                 .extra1         = SYSCTL_ZERO,
2535                 .extra2         = SYSCTL_ONE,
2536         },
2537 #endif /* CONFIG_SMP */
2538         {
2539                 .procname       = "hung_task_panic",
2540                 .data           = &sysctl_hung_task_panic,
2541                 .maxlen         = sizeof(int),
2542                 .mode           = 0644,
2543                 .proc_handler   = proc_dointvec_minmax,
2544                 .extra1         = SYSCTL_ZERO,
2545                 .extra2         = SYSCTL_ONE,
2546         },
2547         {
2548                 .procname       = "hung_task_check_count",
2549                 .data           = &sysctl_hung_task_check_count,
2550                 .maxlen         = sizeof(int),
2551                 .mode           = 0644,
2552                 .proc_handler   = proc_dointvec_minmax,
2553                 .extra1         = SYSCTL_ZERO,
2554         },
2555         {
2556                 .procname       = "hung_task_timeout_secs",
2557                 .data           = &sysctl_hung_task_timeout_secs,
2558                 .maxlen         = sizeof(unsigned long),
2559                 .mode           = 0644,
2560                 .proc_handler   = proc_dohung_task_timeout_secs,
2561                 .extra2         = &hung_task_timeout_max,
2562         },
2563         {
2564                 .procname       = "hung_task_check_interval_secs",
2565                 .data           = &sysctl_hung_task_check_interval_secs,
2566                 .maxlen         = sizeof(unsigned long),
2567                 .mode           = 0644,
2568                 .proc_handler   = proc_dohung_task_timeout_secs,
2569                 .extra2         = &hung_task_timeout_max,
2570         },
2571         {
2572                 .procname       = "hung_task_warnings",
2573                 .data           = &sysctl_hung_task_warnings,
2574                 .maxlen         = sizeof(int),
2575                 .mode           = 0644,
2576                 .proc_handler   = proc_dointvec_minmax,
2577                 .extra1         = &neg_one,
2578         },
2579 #endif
2580 #ifdef CONFIG_RT_MUTEXES
2581         {
2582                 .procname       = "max_lock_depth",
2583                 .data           = &max_lock_depth,
2584                 .maxlen         = sizeof(int),
2585                 .mode           = 0644,
2586                 .proc_handler   = proc_dointvec,
2587         },
2588 #endif
2589         {
2590                 .procname       = "poweroff_cmd",
2591                 .data           = &poweroff_cmd,
2592                 .maxlen         = POWEROFF_CMD_PATH_LEN,
2593                 .mode           = 0644,
2594                 .proc_handler   = proc_dostring,
2595         },
2596 #ifdef CONFIG_KEYS
2597         {
2598                 .procname       = "keys",
2599                 .mode           = 0555,
2600                 .child          = key_sysctls,
2601         },
2602 #endif
2603 #ifdef CONFIG_PERF_EVENTS
2604         /*
2605          * User-space scripts rely on the existence of this file
2606          * as a feature check for perf_events being enabled.
2607          *
2608          * So it's an ABI, do not remove!
2609          */
2610         {
2611                 .procname       = "perf_event_paranoid",
2612                 .data           = &sysctl_perf_event_paranoid,
2613                 .maxlen         = sizeof(sysctl_perf_event_paranoid),
2614                 .mode           = 0644,
2615                 .proc_handler   = proc_dointvec,
2616         },
2617         {
2618                 .procname       = "perf_event_mlock_kb",
2619                 .data           = &sysctl_perf_event_mlock,
2620                 .maxlen         = sizeof(sysctl_perf_event_mlock),
2621                 .mode           = 0644,
2622                 .proc_handler   = proc_dointvec,
2623         },
2624         {
2625                 .procname       = "perf_event_max_sample_rate",
2626                 .data           = &sysctl_perf_event_sample_rate,
2627                 .maxlen         = sizeof(sysctl_perf_event_sample_rate),
2628                 .mode           = 0644,
2629                 .proc_handler   = perf_proc_update_handler,
2630                 .extra1         = SYSCTL_ONE,
2631         },
2632         {
2633                 .procname       = "perf_cpu_time_max_percent",
2634                 .data           = &sysctl_perf_cpu_time_max_percent,
2635                 .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
2636                 .mode           = 0644,
2637                 .proc_handler   = perf_cpu_time_max_percent_handler,
2638                 .extra1         = SYSCTL_ZERO,
2639                 .extra2         = &one_hundred,
2640         },
2641         {
2642                 .procname       = "perf_event_max_stack",
2643                 .data           = &sysctl_perf_event_max_stack,
2644                 .maxlen         = sizeof(sysctl_perf_event_max_stack),
2645                 .mode           = 0644,
2646                 .proc_handler   = perf_event_max_stack_handler,
2647                 .extra1         = SYSCTL_ZERO,
2648                 .extra2         = &six_hundred_forty_kb,
2649         },
2650         {
2651                 .procname       = "perf_event_max_contexts_per_stack",
2652                 .data           = &sysctl_perf_event_max_contexts_per_stack,
2653                 .maxlen         = sizeof(sysctl_perf_event_max_contexts_per_stack),
2654                 .mode           = 0644,
2655                 .proc_handler   = perf_event_max_stack_handler,
2656                 .extra1         = SYSCTL_ZERO,
2657                 .extra2         = &one_thousand,
2658         },
2659 #endif
2660         {
2661                 .procname       = "panic_on_warn",
2662                 .data           = &panic_on_warn,
2663                 .maxlen         = sizeof(int),
2664                 .mode           = 0644,
2665                 .proc_handler   = proc_dointvec_minmax,
2666                 .extra1         = SYSCTL_ZERO,
2667                 .extra2         = SYSCTL_ONE,
2668         },
2669 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
2670         {
2671                 .procname       = "timer_migration",
2672                 .data           = &sysctl_timer_migration,
2673                 .maxlen         = sizeof(unsigned int),
2674                 .mode           = 0644,
2675                 .proc_handler   = timer_migration_handler,
2676                 .extra1         = SYSCTL_ZERO,
2677                 .extra2         = SYSCTL_ONE,
2678         },
2679 #endif
2680 #ifdef CONFIG_BPF_SYSCALL
2681         {
2682                 .procname       = "unprivileged_bpf_disabled",
2683                 .data           = &sysctl_unprivileged_bpf_disabled,
2684                 .maxlen         = sizeof(sysctl_unprivileged_bpf_disabled),
2685                 .mode           = 0644,
2686                 .proc_handler   = bpf_unpriv_handler,
2687                 .extra1         = SYSCTL_ZERO,
2688                 .extra2         = &two,
2689         },
2690         {
2691                 .procname       = "bpf_stats_enabled",
2692                 .data           = &bpf_stats_enabled_key.key,
2693                 .maxlen         = sizeof(bpf_stats_enabled_key),
2694                 .mode           = 0644,
2695                 .proc_handler   = bpf_stats_handler,
2696         },
2697 #endif
2698 #if defined(CONFIG_TREE_RCU)
2699         {
2700                 .procname       = "panic_on_rcu_stall",
2701                 .data           = &sysctl_panic_on_rcu_stall,
2702                 .maxlen         = sizeof(sysctl_panic_on_rcu_stall),
2703                 .mode           = 0644,
2704                 .proc_handler   = proc_dointvec_minmax,
2705                 .extra1         = SYSCTL_ZERO,
2706                 .extra2         = SYSCTL_ONE,
2707         },
2708 #endif
2709 #if defined(CONFIG_TREE_RCU)
2710         {
2711                 .procname       = "max_rcu_stall_to_panic",
2712                 .data           = &sysctl_max_rcu_stall_to_panic,
2713                 .maxlen         = sizeof(sysctl_max_rcu_stall_to_panic),
2714                 .mode           = 0644,
2715                 .proc_handler   = proc_dointvec_minmax,
2716                 .extra1         = SYSCTL_ONE,
2717                 .extra2         = SYSCTL_INT_MAX,
2718         },
2719 #endif
2720 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
2721         {
2722                 .procname       = "stack_erasing",
2723                 .data           = NULL,
2724                 .maxlen         = sizeof(int),
2725                 .mode           = 0600,
2726                 .proc_handler   = stack_erasing_sysctl,
2727                 .extra1         = SYSCTL_ZERO,
2728                 .extra2         = SYSCTL_ONE,
2729         },
2730 #endif
2731         { }
2732 };
2733
2734 static struct ctl_table vm_table[] = {
2735         {
2736                 .procname       = "overcommit_memory",
2737                 .data           = &sysctl_overcommit_memory,
2738                 .maxlen         = sizeof(sysctl_overcommit_memory),
2739                 .mode           = 0644,
2740                 .proc_handler   = overcommit_policy_handler,
2741                 .extra1         = SYSCTL_ZERO,
2742                 .extra2         = &two,
2743         },
2744         {
2745                 .procname       = "panic_on_oom",
2746                 .data           = &sysctl_panic_on_oom,
2747                 .maxlen         = sizeof(sysctl_panic_on_oom),
2748                 .mode           = 0644,
2749                 .proc_handler   = proc_dointvec_minmax,
2750                 .extra1         = SYSCTL_ZERO,
2751                 .extra2         = &two,
2752         },
2753         {
2754                 .procname       = "oom_kill_allocating_task",
2755                 .data           = &sysctl_oom_kill_allocating_task,
2756                 .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
2757                 .mode           = 0644,
2758                 .proc_handler   = proc_dointvec,
2759         },
2760         {
2761                 .procname       = "oom_dump_tasks",
2762                 .data           = &sysctl_oom_dump_tasks,
2763                 .maxlen         = sizeof(sysctl_oom_dump_tasks),
2764                 .mode           = 0644,
2765                 .proc_handler   = proc_dointvec,
2766         },
2767         {
2768                 .procname       = "overcommit_ratio",
2769                 .data           = &sysctl_overcommit_ratio,
2770                 .maxlen         = sizeof(sysctl_overcommit_ratio),
2771                 .mode           = 0644,
2772                 .proc_handler   = overcommit_ratio_handler,
2773         },
2774         {
2775                 .procname       = "overcommit_kbytes",
2776                 .data           = &sysctl_overcommit_kbytes,
2777                 .maxlen         = sizeof(sysctl_overcommit_kbytes),
2778                 .mode           = 0644,
2779                 .proc_handler   = overcommit_kbytes_handler,
2780         },
2781         {
2782                 .procname       = "page-cluster",
2783                 .data           = &page_cluster,
2784                 .maxlen         = sizeof(int),
2785                 .mode           = 0644,
2786                 .proc_handler   = proc_dointvec_minmax,
2787                 .extra1         = SYSCTL_ZERO,
2788         },
2789         {
2790                 .procname       = "dirty_background_ratio",
2791                 .data           = &dirty_background_ratio,
2792                 .maxlen         = sizeof(dirty_background_ratio),
2793                 .mode           = 0644,
2794                 .proc_handler   = dirty_background_ratio_handler,
2795                 .extra1         = SYSCTL_ZERO,
2796                 .extra2         = &one_hundred,
2797         },
2798         {
2799                 .procname       = "dirty_background_bytes",
2800                 .data           = &dirty_background_bytes,
2801                 .maxlen         = sizeof(dirty_background_bytes),
2802                 .mode           = 0644,
2803                 .proc_handler   = dirty_background_bytes_handler,
2804                 .extra1         = &one_ul,
2805         },
2806         {
2807                 .procname       = "dirty_ratio",
2808                 .data           = &vm_dirty_ratio,
2809                 .maxlen         = sizeof(vm_dirty_ratio),
2810                 .mode           = 0644,
2811                 .proc_handler   = dirty_ratio_handler,
2812                 .extra1         = SYSCTL_ZERO,
2813                 .extra2         = &one_hundred,
2814         },
2815         {
2816                 .procname       = "dirty_bytes",
2817                 .data           = &vm_dirty_bytes,
2818                 .maxlen         = sizeof(vm_dirty_bytes),
2819                 .mode           = 0644,
2820                 .proc_handler   = dirty_bytes_handler,
2821                 .extra1         = &dirty_bytes_min,
2822         },
2823         {
2824                 .procname       = "dirty_writeback_centisecs",
2825                 .data           = &dirty_writeback_interval,
2826                 .maxlen         = sizeof(dirty_writeback_interval),
2827                 .mode           = 0644,
2828                 .proc_handler   = dirty_writeback_centisecs_handler,
2829         },
2830         {
2831                 .procname       = "dirty_expire_centisecs",
2832                 .data           = &dirty_expire_interval,
2833                 .maxlen         = sizeof(dirty_expire_interval),
2834                 .mode           = 0644,
2835                 .proc_handler   = proc_dointvec_minmax,
2836                 .extra1         = SYSCTL_ZERO,
2837         },
2838         {
2839                 .procname       = "dirtytime_expire_seconds",
2840                 .data           = &dirtytime_expire_interval,
2841                 .maxlen         = sizeof(dirtytime_expire_interval),
2842                 .mode           = 0644,
2843                 .proc_handler   = dirtytime_interval_handler,
2844                 .extra1         = SYSCTL_ZERO,
2845         },
2846         {
2847                 .procname       = "swappiness",
2848                 .data           = &vm_swappiness,
2849                 .maxlen         = sizeof(vm_swappiness),
2850                 .mode           = 0644,
2851                 .proc_handler   = proc_dointvec_minmax,
2852                 .extra1         = SYSCTL_ZERO,
2853                 .extra2         = &two_hundred,
2854         },
2855 #ifdef CONFIG_HUGETLB_PAGE
2856         {
2857                 .procname       = "nr_hugepages",
2858                 .data           = NULL,
2859                 .maxlen         = sizeof(unsigned long),
2860                 .mode           = 0644,
2861                 .proc_handler   = hugetlb_sysctl_handler,
2862         },
2863 #ifdef CONFIG_NUMA
2864         {
2865                 .procname       = "nr_hugepages_mempolicy",
2866                 .data           = NULL,
2867                 .maxlen         = sizeof(unsigned long),
2868                 .mode           = 0644,
2869                 .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
2870         },
2871         {
2872                 .procname               = "numa_stat",
2873                 .data                   = &sysctl_vm_numa_stat,
2874                 .maxlen                 = sizeof(int),
2875                 .mode                   = 0644,
2876                 .proc_handler   = sysctl_vm_numa_stat_handler,
2877                 .extra1                 = SYSCTL_ZERO,
2878                 .extra2                 = SYSCTL_ONE,
2879         },
2880 #endif
2881          {
2882                 .procname       = "hugetlb_shm_group",
2883                 .data           = &sysctl_hugetlb_shm_group,
2884                 .maxlen         = sizeof(gid_t),
2885                 .mode           = 0644,
2886                 .proc_handler   = proc_dointvec,
2887          },
2888         {
2889                 .procname       = "nr_overcommit_hugepages",
2890                 .data           = NULL,
2891                 .maxlen         = sizeof(unsigned long),
2892                 .mode           = 0644,
2893                 .proc_handler   = hugetlb_overcommit_handler,
2894         },
2895 #endif
2896         {
2897                 .procname       = "lowmem_reserve_ratio",
2898                 .data           = &sysctl_lowmem_reserve_ratio,
2899                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
2900                 .mode           = 0644,
2901                 .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
2902         },
2903         {
2904                 .procname       = "drop_caches",
2905                 .data           = &sysctl_drop_caches,
2906                 .maxlen         = sizeof(int),
2907                 .mode           = 0200,
2908                 .proc_handler   = drop_caches_sysctl_handler,
2909                 .extra1         = SYSCTL_ONE,
2910                 .extra2         = &four,
2911         },
2912 #ifdef CONFIG_COMPACTION
2913         {
2914                 .procname       = "compact_memory",
2915                 .data           = NULL,
2916                 .maxlen         = sizeof(int),
2917                 .mode           = 0200,
2918                 .proc_handler   = sysctl_compaction_handler,
2919         },
2920         {
2921                 .procname       = "compaction_proactiveness",
2922                 .data           = &sysctl_compaction_proactiveness,
2923                 .maxlen         = sizeof(sysctl_compaction_proactiveness),
2924                 .mode           = 0644,
2925                 .proc_handler   = compaction_proactiveness_sysctl_handler,
2926                 .extra1         = SYSCTL_ZERO,
2927                 .extra2         = &one_hundred,
2928         },
2929         {
2930                 .procname       = "extfrag_threshold",
2931                 .data           = &sysctl_extfrag_threshold,
2932                 .maxlen         = sizeof(int),
2933                 .mode           = 0644,
2934                 .proc_handler   = proc_dointvec_minmax,
2935                 .extra1         = &min_extfrag_threshold,
2936                 .extra2         = &max_extfrag_threshold,
2937         },
2938         {
2939                 .procname       = "compact_unevictable_allowed",
2940                 .data           = &sysctl_compact_unevictable_allowed,
2941                 .maxlen         = sizeof(int),
2942                 .mode           = 0644,
2943                 .proc_handler   = proc_dointvec_minmax_warn_RT_change,
2944                 .extra1         = SYSCTL_ZERO,
2945                 .extra2         = SYSCTL_ONE,
2946         },
2947
2948 #endif /* CONFIG_COMPACTION */
2949         {
2950                 .procname       = "min_free_kbytes",
2951                 .data           = &min_free_kbytes,
2952                 .maxlen         = sizeof(min_free_kbytes),
2953                 .mode           = 0644,
2954                 .proc_handler   = min_free_kbytes_sysctl_handler,
2955                 .extra1         = SYSCTL_ZERO,
2956         },
2957         {
2958                 .procname       = "watermark_boost_factor",
2959                 .data           = &watermark_boost_factor,
2960                 .maxlen         = sizeof(watermark_boost_factor),
2961                 .mode           = 0644,
2962                 .proc_handler   = proc_dointvec_minmax,
2963                 .extra1         = SYSCTL_ZERO,
2964         },
2965         {
2966                 .procname       = "watermark_scale_factor",
2967                 .data           = &watermark_scale_factor,
2968                 .maxlen         = sizeof(watermark_scale_factor),
2969                 .mode           = 0644,
2970                 .proc_handler   = watermark_scale_factor_sysctl_handler,
2971                 .extra1         = SYSCTL_ONE,
2972                 .extra2         = &one_thousand,
2973         },
2974         {
2975                 .procname       = "percpu_pagelist_high_fraction",
2976                 .data           = &percpu_pagelist_high_fraction,
2977                 .maxlen         = sizeof(percpu_pagelist_high_fraction),
2978                 .mode           = 0644,
2979                 .proc_handler   = percpu_pagelist_high_fraction_sysctl_handler,
2980                 .extra1         = SYSCTL_ZERO,
2981         },
2982         {
2983                 .procname       = "page_lock_unfairness",
2984                 .data           = &sysctl_page_lock_unfairness,
2985                 .maxlen         = sizeof(sysctl_page_lock_unfairness),
2986                 .mode           = 0644,
2987                 .proc_handler   = proc_dointvec_minmax,
2988                 .extra1         = SYSCTL_ZERO,
2989         },
2990 #ifdef CONFIG_MMU
2991         {
2992                 .procname       = "max_map_count",
2993                 .data           = &sysctl_max_map_count,
2994                 .maxlen         = sizeof(sysctl_max_map_count),
2995                 .mode           = 0644,
2996                 .proc_handler   = proc_dointvec_minmax,
2997                 .extra1         = SYSCTL_ZERO,
2998         },
2999 #else
3000         {
3001                 .procname       = "nr_trim_pages",
3002                 .data           = &sysctl_nr_trim_pages,
3003                 .maxlen         = sizeof(sysctl_nr_trim_pages),
3004                 .mode           = 0644,
3005                 .proc_handler   = proc_dointvec_minmax,
3006                 .extra1         = SYSCTL_ZERO,
3007         },
3008 #endif
3009         {
3010                 .procname       = "laptop_mode",
3011                 .data           = &laptop_mode,
3012                 .maxlen         = sizeof(laptop_mode),
3013                 .mode           = 0644,
3014                 .proc_handler   = proc_dointvec_jiffies,
3015         },
3016         {
3017                 .procname       = "vfs_cache_pressure",
3018                 .data           = &sysctl_vfs_cache_pressure,
3019                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
3020                 .mode           = 0644,
3021                 .proc_handler   = proc_dointvec_minmax,
3022                 .extra1         = SYSCTL_ZERO,
3023         },
3024 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
3025     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
3026         {
3027                 .procname       = "legacy_va_layout",
3028                 .data           = &sysctl_legacy_va_layout,
3029                 .maxlen         = sizeof(sysctl_legacy_va_layout),
3030                 .mode           = 0644,
3031                 .proc_handler   = proc_dointvec_minmax,
3032                 .extra1         = SYSCTL_ZERO,
3033         },
3034 #endif
3035 #ifdef CONFIG_NUMA
3036         {
3037                 .procname       = "zone_reclaim_mode",
3038                 .data           = &node_reclaim_mode,
3039                 .maxlen         = sizeof(node_reclaim_mode),
3040                 .mode           = 0644,
3041                 .proc_handler   = proc_dointvec_minmax,
3042                 .extra1         = SYSCTL_ZERO,
3043         },
3044         {
3045                 .procname       = "min_unmapped_ratio",
3046                 .data           = &sysctl_min_unmapped_ratio,
3047                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
3048                 .mode           = 0644,
3049                 .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
3050                 .extra1         = SYSCTL_ZERO,
3051                 .extra2         = &one_hundred,
3052         },
3053         {
3054                 .procname       = "min_slab_ratio",
3055                 .data           = &sysctl_min_slab_ratio,
3056                 .maxlen         = sizeof(sysctl_min_slab_ratio),
3057                 .mode           = 0644,
3058                 .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
3059                 .extra1         = SYSCTL_ZERO,
3060                 .extra2         = &one_hundred,
3061         },
3062 #endif
3063 #ifdef CONFIG_SMP
3064         {
3065                 .procname       = "stat_interval",
3066                 .data           = &sysctl_stat_interval,
3067                 .maxlen         = sizeof(sysctl_stat_interval),
3068                 .mode           = 0644,
3069                 .proc_handler   = proc_dointvec_jiffies,
3070         },
3071         {
3072                 .procname       = "stat_refresh",
3073                 .data           = NULL,
3074                 .maxlen         = 0,
3075                 .mode           = 0600,
3076                 .proc_handler   = vmstat_refresh,
3077         },
3078 #endif
3079 #ifdef CONFIG_MMU
3080         {
3081                 .procname       = "mmap_min_addr",
3082                 .data           = &dac_mmap_min_addr,
3083                 .maxlen         = sizeof(unsigned long),
3084                 .mode           = 0644,
3085                 .proc_handler   = mmap_min_addr_handler,
3086         },
3087 #endif
3088 #ifdef CONFIG_NUMA
3089         {
3090                 .procname       = "numa_zonelist_order",
3091                 .data           = &numa_zonelist_order,
3092                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
3093                 .mode           = 0644,
3094                 .proc_handler   = numa_zonelist_order_handler,
3095         },
3096 #endif
3097 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
3098    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
3099         {
3100                 .procname       = "vdso_enabled",
3101 #ifdef CONFIG_X86_32
3102                 .data           = &vdso32_enabled,
3103                 .maxlen         = sizeof(vdso32_enabled),
3104 #else
3105                 .data           = &vdso_enabled,
3106                 .maxlen         = sizeof(vdso_enabled),
3107 #endif
3108                 .mode           = 0644,
3109                 .proc_handler   = proc_dointvec,
3110                 .extra1         = SYSCTL_ZERO,
3111         },
3112 #endif
3113 #ifdef CONFIG_HIGHMEM
3114         {
3115                 .procname       = "highmem_is_dirtyable",
3116                 .data           = &vm_highmem_is_dirtyable,
3117                 .maxlen         = sizeof(vm_highmem_is_dirtyable),
3118                 .mode           = 0644,
3119                 .proc_handler   = proc_dointvec_minmax,
3120                 .extra1         = SYSCTL_ZERO,
3121                 .extra2         = SYSCTL_ONE,
3122         },
3123 #endif
3124 #ifdef CONFIG_MEMORY_FAILURE
3125         {
3126                 .procname       = "memory_failure_early_kill",
3127                 .data           = &sysctl_memory_failure_early_kill,
3128                 .maxlen         = sizeof(sysctl_memory_failure_early_kill),
3129                 .mode           = 0644,
3130                 .proc_handler   = proc_dointvec_minmax,
3131                 .extra1         = SYSCTL_ZERO,
3132                 .extra2         = SYSCTL_ONE,
3133         },
3134         {
3135                 .procname       = "memory_failure_recovery",
3136                 .data           = &sysctl_memory_failure_recovery,
3137                 .maxlen         = sizeof(sysctl_memory_failure_recovery),
3138                 .mode           = 0644,
3139                 .proc_handler   = proc_dointvec_minmax,
3140                 .extra1         = SYSCTL_ZERO,
3141                 .extra2         = SYSCTL_ONE,
3142         },
3143 #endif
3144         {
3145                 .procname       = "user_reserve_kbytes",
3146                 .data           = &sysctl_user_reserve_kbytes,
3147                 .maxlen         = sizeof(sysctl_user_reserve_kbytes),
3148                 .mode           = 0644,
3149                 .proc_handler   = proc_doulongvec_minmax,
3150         },
3151         {
3152                 .procname       = "admin_reserve_kbytes",
3153                 .data           = &sysctl_admin_reserve_kbytes,
3154                 .maxlen         = sizeof(sysctl_admin_reserve_kbytes),
3155                 .mode           = 0644,
3156                 .proc_handler   = proc_doulongvec_minmax,
3157         },
3158 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
3159         {
3160                 .procname       = "mmap_rnd_bits",
3161                 .data           = &mmap_rnd_bits,
3162                 .maxlen         = sizeof(mmap_rnd_bits),
3163                 .mode           = 0600,
3164                 .proc_handler   = proc_dointvec_minmax,
3165                 .extra1         = (void *)&mmap_rnd_bits_min,
3166                 .extra2         = (void *)&mmap_rnd_bits_max,
3167         },
3168 #endif
3169 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
3170         {
3171                 .procname       = "mmap_rnd_compat_bits",
3172                 .data           = &mmap_rnd_compat_bits,
3173                 .maxlen         = sizeof(mmap_rnd_compat_bits),
3174                 .mode           = 0600,
3175                 .proc_handler   = proc_dointvec_minmax,
3176                 .extra1         = (void *)&mmap_rnd_compat_bits_min,
3177                 .extra2         = (void *)&mmap_rnd_compat_bits_max,
3178         },
3179 #endif
3180 #ifdef CONFIG_USERFAULTFD
3181         {
3182                 .procname       = "unprivileged_userfaultfd",
3183                 .data           = &sysctl_unprivileged_userfaultfd,
3184                 .maxlen         = sizeof(sysctl_unprivileged_userfaultfd),
3185                 .mode           = 0644,
3186                 .proc_handler   = proc_dointvec_minmax,
3187                 .extra1         = SYSCTL_ZERO,
3188                 .extra2         = SYSCTL_ONE,
3189         },
3190 #endif
3191         { }
3192 };
3193
3194 static struct ctl_table fs_table[] = {
3195         {
3196                 .procname       = "inode-nr",
3197                 .data           = &inodes_stat,
3198                 .maxlen         = 2*sizeof(long),
3199                 .mode           = 0444,
3200                 .proc_handler   = proc_nr_inodes,
3201         },
3202         {
3203                 .procname       = "inode-state",
3204                 .data           = &inodes_stat,
3205                 .maxlen         = 7*sizeof(long),
3206                 .mode           = 0444,
3207                 .proc_handler   = proc_nr_inodes,
3208         },
3209         {
3210                 .procname       = "file-nr",
3211                 .data           = &files_stat,
3212                 .maxlen         = sizeof(files_stat),
3213                 .mode           = 0444,
3214                 .proc_handler   = proc_nr_files,
3215         },
3216         {
3217                 .procname       = "file-max",
3218                 .data           = &files_stat.max_files,
3219                 .maxlen         = sizeof(files_stat.max_files),
3220                 .mode           = 0644,
3221                 .proc_handler   = proc_doulongvec_minmax,
3222                 .extra1         = &zero_ul,
3223                 .extra2         = &long_max,
3224         },
3225         {
3226                 .procname       = "nr_open",
3227                 .data           = &sysctl_nr_open,
3228                 .maxlen         = sizeof(unsigned int),
3229                 .mode           = 0644,
3230                 .proc_handler   = proc_dointvec_minmax,
3231                 .extra1         = &sysctl_nr_open_min,
3232                 .extra2         = &sysctl_nr_open_max,
3233         },
3234         {
3235                 .procname       = "dentry-state",
3236                 .data           = &dentry_stat,
3237                 .maxlen         = 6*sizeof(long),
3238                 .mode           = 0444,
3239                 .proc_handler   = proc_nr_dentry,
3240         },
3241         {
3242                 .procname       = "overflowuid",
3243                 .data           = &fs_overflowuid,
3244                 .maxlen         = sizeof(int),
3245                 .mode           = 0644,
3246                 .proc_handler   = proc_dointvec_minmax,
3247                 .extra1         = &minolduid,
3248                 .extra2         = &maxolduid,
3249         },
3250         {
3251                 .procname       = "overflowgid",
3252                 .data           = &fs_overflowgid,
3253                 .maxlen         = sizeof(int),
3254                 .mode           = 0644,
3255                 .proc_handler   = proc_dointvec_minmax,
3256                 .extra1         = &minolduid,
3257                 .extra2         = &maxolduid,
3258         },
3259 #ifdef CONFIG_FILE_LOCKING
3260         {
3261                 .procname       = "leases-enable",
3262                 .data           = &leases_enable,
3263                 .maxlen         = sizeof(int),
3264                 .mode           = 0644,
3265                 .proc_handler   = proc_dointvec,
3266         },
3267 #endif
3268 #ifdef CONFIG_DNOTIFY
3269         {
3270                 .procname       = "dir-notify-enable",
3271                 .data           = &dir_notify_enable,
3272                 .maxlen         = sizeof(int),
3273                 .mode           = 0644,
3274                 .proc_handler   = proc_dointvec,
3275         },
3276 #endif
3277 #ifdef CONFIG_MMU
3278 #ifdef CONFIG_FILE_LOCKING
3279         {
3280                 .procname       = "lease-break-time",
3281                 .data           = &lease_break_time,
3282                 .maxlen         = sizeof(int),
3283                 .mode           = 0644,
3284                 .proc_handler   = proc_dointvec,
3285         },
3286 #endif
3287 #ifdef CONFIG_AIO
3288         {
3289                 .procname       = "aio-nr",
3290                 .data           = &aio_nr,
3291                 .maxlen         = sizeof(aio_nr),
3292                 .mode           = 0444,
3293                 .proc_handler   = proc_doulongvec_minmax,
3294         },
3295         {
3296                 .procname       = "aio-max-nr",
3297                 .data           = &aio_max_nr,
3298                 .maxlen         = sizeof(aio_max_nr),
3299                 .mode           = 0644,
3300                 .proc_handler   = proc_doulongvec_minmax,
3301         },
3302 #endif /* CONFIG_AIO */
3303 #ifdef CONFIG_INOTIFY_USER
3304         {
3305                 .procname       = "inotify",
3306                 .mode           = 0555,
3307                 .child          = inotify_table,
3308         },
3309 #endif
3310 #ifdef CONFIG_FANOTIFY
3311         {
3312                 .procname       = "fanotify",
3313                 .mode           = 0555,
3314                 .child          = fanotify_table,
3315         },
3316 #endif
3317 #ifdef CONFIG_EPOLL
3318         {
3319                 .procname       = "epoll",
3320                 .mode           = 0555,
3321                 .child          = epoll_table,
3322         },
3323 #endif
3324 #endif
3325         {
3326                 .procname       = "protected_symlinks",
3327                 .data           = &sysctl_protected_symlinks,
3328                 .maxlen         = sizeof(int),
3329                 .mode           = 0600,
3330                 .proc_handler   = proc_dointvec_minmax,
3331                 .extra1         = SYSCTL_ZERO,
3332                 .extra2         = SYSCTL_ONE,
3333         },
3334         {
3335                 .procname       = "protected_hardlinks",
3336                 .data           = &sysctl_protected_hardlinks,
3337                 .maxlen         = sizeof(int),
3338                 .mode           = 0600,
3339                 .proc_handler   = proc_dointvec_minmax,
3340                 .extra1         = SYSCTL_ZERO,
3341                 .extra2         = SYSCTL_ONE,
3342         },
3343         {
3344                 .procname       = "protected_fifos",
3345                 .data           = &sysctl_protected_fifos,
3346                 .maxlen         = sizeof(int),
3347                 .mode           = 0600,
3348                 .proc_handler   = proc_dointvec_minmax,
3349                 .extra1         = SYSCTL_ZERO,
3350                 .extra2         = &two,
3351         },
3352         {
3353                 .procname       = "protected_regular",
3354                 .data           = &sysctl_protected_regular,
3355                 .maxlen         = sizeof(int),
3356                 .mode           = 0600,
3357                 .proc_handler   = proc_dointvec_minmax,
3358                 .extra1         = SYSCTL_ZERO,
3359                 .extra2         = &two,
3360         },
3361         {
3362                 .procname       = "suid_dumpable",
3363                 .data           = &suid_dumpable,
3364                 .maxlen         = sizeof(int),
3365                 .mode           = 0644,
3366                 .proc_handler   = proc_dointvec_minmax_coredump,
3367                 .extra1         = SYSCTL_ZERO,
3368                 .extra2         = &two,
3369         },
3370 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
3371         {
3372                 .procname       = "binfmt_misc",
3373                 .mode           = 0555,
3374                 .child          = sysctl_mount_point,
3375         },
3376 #endif
3377         {
3378                 .procname       = "pipe-max-size",
3379                 .data           = &pipe_max_size,
3380                 .maxlen         = sizeof(pipe_max_size),
3381                 .mode           = 0644,
3382                 .proc_handler   = proc_dopipe_max_size,
3383         },
3384         {
3385                 .procname       = "pipe-user-pages-hard",
3386                 .data           = &pipe_user_pages_hard,
3387                 .maxlen         = sizeof(pipe_user_pages_hard),
3388                 .mode           = 0644,
3389                 .proc_handler   = proc_doulongvec_minmax,
3390         },
3391         {
3392                 .procname       = "pipe-user-pages-soft",
3393                 .data           = &pipe_user_pages_soft,
3394                 .maxlen         = sizeof(pipe_user_pages_soft),
3395                 .mode           = 0644,
3396                 .proc_handler   = proc_doulongvec_minmax,
3397         },
3398         {
3399                 .procname       = "mount-max",
3400                 .data           = &sysctl_mount_max,
3401                 .maxlen         = sizeof(unsigned int),
3402                 .mode           = 0644,
3403                 .proc_handler   = proc_dointvec_minmax,
3404                 .extra1         = SYSCTL_ONE,
3405         },
3406         { }
3407 };
3408
3409 static struct ctl_table debug_table[] = {
3410 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
3411         {
3412                 .procname       = "exception-trace",
3413                 .data           = &show_unhandled_signals,
3414                 .maxlen         = sizeof(int),
3415                 .mode           = 0644,
3416                 .proc_handler   = proc_dointvec
3417         },
3418 #endif
3419 #if defined(CONFIG_OPTPROBES)
3420         {
3421                 .procname       = "kprobes-optimization",
3422                 .data           = &sysctl_kprobes_optimization,
3423                 .maxlen         = sizeof(int),
3424                 .mode           = 0644,
3425                 .proc_handler   = proc_kprobes_optimization_handler,
3426                 .extra1         = SYSCTL_ZERO,
3427                 .extra2         = SYSCTL_ONE,
3428         },
3429 #endif
3430         { }
3431 };
3432
3433 static struct ctl_table dev_table[] = {
3434         { }
3435 };
3436
3437 static struct ctl_table sysctl_base_table[] = {
3438         {
3439                 .procname       = "kernel",
3440                 .mode           = 0555,
3441                 .child          = kern_table,
3442         },
3443         {
3444                 .procname       = "vm",
3445                 .mode           = 0555,
3446                 .child          = vm_table,
3447         },
3448         {
3449                 .procname       = "fs",
3450                 .mode           = 0555,
3451                 .child          = fs_table,
3452         },
3453         {
3454                 .procname       = "debug",
3455                 .mode           = 0555,
3456                 .child          = debug_table,
3457         },
3458         {
3459                 .procname       = "dev",
3460                 .mode           = 0555,
3461                 .child          = dev_table,
3462         },
3463         { }
3464 };
3465
3466 int __init sysctl_init(void)
3467 {
3468         struct ctl_table_header *hdr;
3469
3470         hdr = register_sysctl_table(sysctl_base_table);
3471         kmemleak_not_leak(hdr);
3472         return 0;
3473 }
3474 #endif /* CONFIG_SYSCTL */
3475 /*
3476  * No sense putting this after each symbol definition, twice,
3477  * exception granted :-)
3478  */
3479 EXPORT_SYMBOL(proc_dobool);
3480 EXPORT_SYMBOL(proc_dointvec);
3481 EXPORT_SYMBOL(proc_douintvec);
3482 EXPORT_SYMBOL(proc_dointvec_jiffies);
3483 EXPORT_SYMBOL(proc_dointvec_minmax);
3484 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3485 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3486 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3487 EXPORT_SYMBOL(proc_dostring);
3488 EXPORT_SYMBOL(proc_doulongvec_minmax);
3489 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3490 EXPORT_SYMBOL(proc_do_large_bitmap);