Merge tag 'riscv-for-linus-6.3-rc2' of git://git.kernel.org/pub/scm/linux/kernel...
[platform/kernel/linux-starfive.git] / kernel / sysctl.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * sysctl.c: General linux system control interface
4  *
5  * Begun 24 March 1995, Stephen Tweedie
6  * Added /proc support, Dec 1995
7  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
8  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
9  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
10  * Dynamic registration fixes, Stephen Tweedie.
11  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
12  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13  *  Horn.
14  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
15  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
16  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17  *  Wendling.
18  * The list_for_each() macro wasn't appropriate for the sysctl loop.
19  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
20  */
21
22 #include <linux/module.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/bitmap.h>
28 #include <linux/signal.h>
29 #include <linux/panic.h>
30 #include <linux/printk.h>
31 #include <linux/proc_fs.h>
32 #include <linux/security.h>
33 #include <linux/ctype.h>
34 #include <linux/kmemleak.h>
35 #include <linux/filter.h>
36 #include <linux/fs.h>
37 #include <linux/init.h>
38 #include <linux/kernel.h>
39 #include <linux/kobject.h>
40 #include <linux/net.h>
41 #include <linux/sysrq.h>
42 #include <linux/highuid.h>
43 #include <linux/writeback.h>
44 #include <linux/ratelimit.h>
45 #include <linux/compaction.h>
46 #include <linux/hugetlb.h>
47 #include <linux/initrd.h>
48 #include <linux/key.h>
49 #include <linux/times.h>
50 #include <linux/limits.h>
51 #include <linux/dcache.h>
52 #include <linux/syscalls.h>
53 #include <linux/vmstat.h>
54 #include <linux/nfs_fs.h>
55 #include <linux/acpi.h>
56 #include <linux/reboot.h>
57 #include <linux/ftrace.h>
58 #include <linux/perf_event.h>
59 #include <linux/oom.h>
60 #include <linux/kmod.h>
61 #include <linux/capability.h>
62 #include <linux/binfmts.h>
63 #include <linux/sched/sysctl.h>
64 #include <linux/mount.h>
65 #include <linux/userfaultfd_k.h>
66 #include <linux/pid.h>
67
68 #include "../lib/kstrtox.h"
69
70 #include <linux/uaccess.h>
71 #include <asm/processor.h>
72
73 #ifdef CONFIG_X86
74 #include <asm/nmi.h>
75 #include <asm/stacktrace.h>
76 #include <asm/io.h>
77 #endif
78 #ifdef CONFIG_SPARC
79 #include <asm/setup.h>
80 #endif
81 #ifdef CONFIG_RT_MUTEXES
82 #include <linux/rtmutex.h>
83 #endif
84
85 /* shared constants to be used in various sysctls */
86 const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
87 EXPORT_SYMBOL(sysctl_vals);
88
89 const unsigned long sysctl_long_vals[] = { 0, 1, LONG_MAX };
90 EXPORT_SYMBOL_GPL(sysctl_long_vals);
91
92 #if defined(CONFIG_SYSCTL)
93
94 /* Constants used for minimum and maximum */
95
96 #ifdef CONFIG_PERF_EVENTS
97 static const int six_hundred_forty_kb = 640 * 1024;
98 #endif
99
100
101 static const int ngroups_max = NGROUPS_MAX;
102 static const int cap_last_cap = CAP_LAST_CAP;
103
104 #ifdef CONFIG_PROC_SYSCTL
105
106 /**
107  * enum sysctl_writes_mode - supported sysctl write modes
108  *
109  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
110  *      to be written, and multiple writes on the same sysctl file descriptor
111  *      will rewrite the sysctl value, regardless of file position. No warning
112  *      is issued when the initial position is not 0.
113  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
114  *      not 0.
115  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
116  *      file position 0 and the value must be fully contained in the buffer
117  *      sent to the write syscall. If dealing with strings respect the file
118  *      position, but restrict this to the max length of the buffer, anything
119  *      passed the max length will be ignored. Multiple writes will append
120  *      to the buffer.
121  *
122  * These write modes control how current file position affects the behavior of
123  * updating sysctl values through the proc interface on each write.
124  */
125 enum sysctl_writes_mode {
126         SYSCTL_WRITES_LEGACY            = -1,
127         SYSCTL_WRITES_WARN              = 0,
128         SYSCTL_WRITES_STRICT            = 1,
129 };
130
131 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
132 #endif /* CONFIG_PROC_SYSCTL */
133
134 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
135     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
136 int sysctl_legacy_va_layout;
137 #endif
138
139 #endif /* CONFIG_SYSCTL */
140
141 /*
142  * /proc/sys support
143  */
144
145 #ifdef CONFIG_PROC_SYSCTL
146
147 static int _proc_do_string(char *data, int maxlen, int write,
148                 char *buffer, size_t *lenp, loff_t *ppos)
149 {
150         size_t len;
151         char c, *p;
152
153         if (!data || !maxlen || !*lenp) {
154                 *lenp = 0;
155                 return 0;
156         }
157
158         if (write) {
159                 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
160                         /* Only continue writes not past the end of buffer. */
161                         len = strlen(data);
162                         if (len > maxlen - 1)
163                                 len = maxlen - 1;
164
165                         if (*ppos > len)
166                                 return 0;
167                         len = *ppos;
168                 } else {
169                         /* Start writing from beginning of buffer. */
170                         len = 0;
171                 }
172
173                 *ppos += *lenp;
174                 p = buffer;
175                 while ((p - buffer) < *lenp && len < maxlen - 1) {
176                         c = *(p++);
177                         if (c == 0 || c == '\n')
178                                 break;
179                         data[len++] = c;
180                 }
181                 data[len] = 0;
182         } else {
183                 len = strlen(data);
184                 if (len > maxlen)
185                         len = maxlen;
186
187                 if (*ppos > len) {
188                         *lenp = 0;
189                         return 0;
190                 }
191
192                 data += *ppos;
193                 len  -= *ppos;
194
195                 if (len > *lenp)
196                         len = *lenp;
197                 if (len)
198                         memcpy(buffer, data, len);
199                 if (len < *lenp) {
200                         buffer[len] = '\n';
201                         len++;
202                 }
203                 *lenp = len;
204                 *ppos += len;
205         }
206         return 0;
207 }
208
209 static void warn_sysctl_write(struct ctl_table *table)
210 {
211         pr_warn_once("%s wrote to %s when file position was not 0!\n"
212                 "This will not be supported in the future. To silence this\n"
213                 "warning, set kernel.sysctl_writes_strict = -1\n",
214                 current->comm, table->procname);
215 }
216
217 /**
218  * proc_first_pos_non_zero_ignore - check if first position is allowed
219  * @ppos: file position
220  * @table: the sysctl table
221  *
222  * Returns true if the first position is non-zero and the sysctl_writes_strict
223  * mode indicates this is not allowed for numeric input types. String proc
224  * handlers can ignore the return value.
225  */
226 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
227                                            struct ctl_table *table)
228 {
229         if (!*ppos)
230                 return false;
231
232         switch (sysctl_writes_strict) {
233         case SYSCTL_WRITES_STRICT:
234                 return true;
235         case SYSCTL_WRITES_WARN:
236                 warn_sysctl_write(table);
237                 return false;
238         default:
239                 return false;
240         }
241 }
242
243 /**
244  * proc_dostring - read a string sysctl
245  * @table: the sysctl table
246  * @write: %TRUE if this is a write to the sysctl file
247  * @buffer: the user buffer
248  * @lenp: the size of the user buffer
249  * @ppos: file position
250  *
251  * Reads/writes a string from/to the user buffer. If the kernel
252  * buffer provided is not large enough to hold the string, the
253  * string is truncated. The copied string is %NULL-terminated.
254  * If the string is being read by the user process, it is copied
255  * and a newline '\n' is added. It is truncated if the buffer is
256  * not large enough.
257  *
258  * Returns 0 on success.
259  */
260 int proc_dostring(struct ctl_table *table, int write,
261                   void *buffer, size_t *lenp, loff_t *ppos)
262 {
263         if (write)
264                 proc_first_pos_non_zero_ignore(ppos, table);
265
266         return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
267                         ppos);
268 }
269
270 static void proc_skip_spaces(char **buf, size_t *size)
271 {
272         while (*size) {
273                 if (!isspace(**buf))
274                         break;
275                 (*size)--;
276                 (*buf)++;
277         }
278 }
279
280 static void proc_skip_char(char **buf, size_t *size, const char v)
281 {
282         while (*size) {
283                 if (**buf != v)
284                         break;
285                 (*size)--;
286                 (*buf)++;
287         }
288 }
289
290 /**
291  * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
292  *                   fail on overflow
293  *
294  * @cp: kernel buffer containing the string to parse
295  * @endp: pointer to store the trailing characters
296  * @base: the base to use
297  * @res: where the parsed integer will be stored
298  *
299  * In case of success 0 is returned and @res will contain the parsed integer,
300  * @endp will hold any trailing characters.
301  * This function will fail the parse on overflow. If there wasn't an overflow
302  * the function will defer the decision what characters count as invalid to the
303  * caller.
304  */
305 static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
306                            unsigned long *res)
307 {
308         unsigned long long result;
309         unsigned int rv;
310
311         cp = _parse_integer_fixup_radix(cp, &base);
312         rv = _parse_integer(cp, base, &result);
313         if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
314                 return -ERANGE;
315
316         cp += rv;
317
318         if (endp)
319                 *endp = (char *)cp;
320
321         *res = (unsigned long)result;
322         return 0;
323 }
324
325 #define TMPBUFLEN 22
326 /**
327  * proc_get_long - reads an ASCII formatted integer from a user buffer
328  *
329  * @buf: a kernel buffer
330  * @size: size of the kernel buffer
331  * @val: this is where the number will be stored
332  * @neg: set to %TRUE if number is negative
333  * @perm_tr: a vector which contains the allowed trailers
334  * @perm_tr_len: size of the perm_tr vector
335  * @tr: pointer to store the trailer character
336  *
337  * In case of success %0 is returned and @buf and @size are updated with
338  * the amount of bytes read. If @tr is non-NULL and a trailing
339  * character exists (size is non-zero after returning from this
340  * function), @tr is updated with the trailing character.
341  */
342 static int proc_get_long(char **buf, size_t *size,
343                           unsigned long *val, bool *neg,
344                           const char *perm_tr, unsigned perm_tr_len, char *tr)
345 {
346         char *p, tmp[TMPBUFLEN];
347         ssize_t len = *size;
348
349         if (len <= 0)
350                 return -EINVAL;
351
352         if (len > TMPBUFLEN - 1)
353                 len = TMPBUFLEN - 1;
354
355         memcpy(tmp, *buf, len);
356
357         tmp[len] = 0;
358         p = tmp;
359         if (*p == '-' && *size > 1) {
360                 *neg = true;
361                 p++;
362         } else
363                 *neg = false;
364         if (!isdigit(*p))
365                 return -EINVAL;
366
367         if (strtoul_lenient(p, &p, 0, val))
368                 return -EINVAL;
369
370         len = p - tmp;
371
372         /* We don't know if the next char is whitespace thus we may accept
373          * invalid integers (e.g. 1234...a) or two integers instead of one
374          * (e.g. 123...1). So lets not allow such large numbers. */
375         if (len == TMPBUFLEN - 1)
376                 return -EINVAL;
377
378         if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
379                 return -EINVAL;
380
381         if (tr && (len < *size))
382                 *tr = *p;
383
384         *buf += len;
385         *size -= len;
386
387         return 0;
388 }
389
390 /**
391  * proc_put_long - converts an integer to a decimal ASCII formatted string
392  *
393  * @buf: the user buffer
394  * @size: the size of the user buffer
395  * @val: the integer to be converted
396  * @neg: sign of the number, %TRUE for negative
397  *
398  * In case of success @buf and @size are updated with the amount of bytes
399  * written.
400  */
401 static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
402 {
403         int len;
404         char tmp[TMPBUFLEN], *p = tmp;
405
406         sprintf(p, "%s%lu", neg ? "-" : "", val);
407         len = strlen(tmp);
408         if (len > *size)
409                 len = *size;
410         memcpy(*buf, tmp, len);
411         *size -= len;
412         *buf += len;
413 }
414 #undef TMPBUFLEN
415
416 static void proc_put_char(void **buf, size_t *size, char c)
417 {
418         if (*size) {
419                 char **buffer = (char **)buf;
420                 **buffer = c;
421
422                 (*size)--;
423                 (*buffer)++;
424                 *buf = *buffer;
425         }
426 }
427
428 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
429                                  int *valp,
430                                  int write, void *data)
431 {
432         if (write) {
433                 if (*negp) {
434                         if (*lvalp > (unsigned long) INT_MAX + 1)
435                                 return -EINVAL;
436                         WRITE_ONCE(*valp, -*lvalp);
437                 } else {
438                         if (*lvalp > (unsigned long) INT_MAX)
439                                 return -EINVAL;
440                         WRITE_ONCE(*valp, *lvalp);
441                 }
442         } else {
443                 int val = READ_ONCE(*valp);
444                 if (val < 0) {
445                         *negp = true;
446                         *lvalp = -(unsigned long)val;
447                 } else {
448                         *negp = false;
449                         *lvalp = (unsigned long)val;
450                 }
451         }
452         return 0;
453 }
454
455 static int do_proc_douintvec_conv(unsigned long *lvalp,
456                                   unsigned int *valp,
457                                   int write, void *data)
458 {
459         if (write) {
460                 if (*lvalp > UINT_MAX)
461                         return -EINVAL;
462                 WRITE_ONCE(*valp, *lvalp);
463         } else {
464                 unsigned int val = READ_ONCE(*valp);
465                 *lvalp = (unsigned long)val;
466         }
467         return 0;
468 }
469
470 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
471
472 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
473                   int write, void *buffer,
474                   size_t *lenp, loff_t *ppos,
475                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
476                               int write, void *data),
477                   void *data)
478 {
479         int *i, vleft, first = 1, err = 0;
480         size_t left;
481         char *p;
482
483         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
484                 *lenp = 0;
485                 return 0;
486         }
487
488         i = (int *) tbl_data;
489         vleft = table->maxlen / sizeof(*i);
490         left = *lenp;
491
492         if (!conv)
493                 conv = do_proc_dointvec_conv;
494
495         if (write) {
496                 if (proc_first_pos_non_zero_ignore(ppos, table))
497                         goto out;
498
499                 if (left > PAGE_SIZE - 1)
500                         left = PAGE_SIZE - 1;
501                 p = buffer;
502         }
503
504         for (; left && vleft--; i++, first=0) {
505                 unsigned long lval;
506                 bool neg;
507
508                 if (write) {
509                         proc_skip_spaces(&p, &left);
510
511                         if (!left)
512                                 break;
513                         err = proc_get_long(&p, &left, &lval, &neg,
514                                              proc_wspace_sep,
515                                              sizeof(proc_wspace_sep), NULL);
516                         if (err)
517                                 break;
518                         if (conv(&neg, &lval, i, 1, data)) {
519                                 err = -EINVAL;
520                                 break;
521                         }
522                 } else {
523                         if (conv(&neg, &lval, i, 0, data)) {
524                                 err = -EINVAL;
525                                 break;
526                         }
527                         if (!first)
528                                 proc_put_char(&buffer, &left, '\t');
529                         proc_put_long(&buffer, &left, lval, neg);
530                 }
531         }
532
533         if (!write && !first && left && !err)
534                 proc_put_char(&buffer, &left, '\n');
535         if (write && !err && left)
536                 proc_skip_spaces(&p, &left);
537         if (write && first)
538                 return err ? : -EINVAL;
539         *lenp -= left;
540 out:
541         *ppos += *lenp;
542         return err;
543 }
544
545 static int do_proc_dointvec(struct ctl_table *table, int write,
546                   void *buffer, size_t *lenp, loff_t *ppos,
547                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
548                               int write, void *data),
549                   void *data)
550 {
551         return __do_proc_dointvec(table->data, table, write,
552                         buffer, lenp, ppos, conv, data);
553 }
554
555 static int do_proc_douintvec_w(unsigned int *tbl_data,
556                                struct ctl_table *table,
557                                void *buffer,
558                                size_t *lenp, loff_t *ppos,
559                                int (*conv)(unsigned long *lvalp,
560                                            unsigned int *valp,
561                                            int write, void *data),
562                                void *data)
563 {
564         unsigned long lval;
565         int err = 0;
566         size_t left;
567         bool neg;
568         char *p = buffer;
569
570         left = *lenp;
571
572         if (proc_first_pos_non_zero_ignore(ppos, table))
573                 goto bail_early;
574
575         if (left > PAGE_SIZE - 1)
576                 left = PAGE_SIZE - 1;
577
578         proc_skip_spaces(&p, &left);
579         if (!left) {
580                 err = -EINVAL;
581                 goto out_free;
582         }
583
584         err = proc_get_long(&p, &left, &lval, &neg,
585                              proc_wspace_sep,
586                              sizeof(proc_wspace_sep), NULL);
587         if (err || neg) {
588                 err = -EINVAL;
589                 goto out_free;
590         }
591
592         if (conv(&lval, tbl_data, 1, data)) {
593                 err = -EINVAL;
594                 goto out_free;
595         }
596
597         if (!err && left)
598                 proc_skip_spaces(&p, &left);
599
600 out_free:
601         if (err)
602                 return -EINVAL;
603
604         return 0;
605
606         /* This is in keeping with old __do_proc_dointvec() */
607 bail_early:
608         *ppos += *lenp;
609         return err;
610 }
611
612 static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
613                                size_t *lenp, loff_t *ppos,
614                                int (*conv)(unsigned long *lvalp,
615                                            unsigned int *valp,
616                                            int write, void *data),
617                                void *data)
618 {
619         unsigned long lval;
620         int err = 0;
621         size_t left;
622
623         left = *lenp;
624
625         if (conv(&lval, tbl_data, 0, data)) {
626                 err = -EINVAL;
627                 goto out;
628         }
629
630         proc_put_long(&buffer, &left, lval, false);
631         if (!left)
632                 goto out;
633
634         proc_put_char(&buffer, &left, '\n');
635
636 out:
637         *lenp -= left;
638         *ppos += *lenp;
639
640         return err;
641 }
642
643 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
644                                int write, void *buffer,
645                                size_t *lenp, loff_t *ppos,
646                                int (*conv)(unsigned long *lvalp,
647                                            unsigned int *valp,
648                                            int write, void *data),
649                                void *data)
650 {
651         unsigned int *i, vleft;
652
653         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
654                 *lenp = 0;
655                 return 0;
656         }
657
658         i = (unsigned int *) tbl_data;
659         vleft = table->maxlen / sizeof(*i);
660
661         /*
662          * Arrays are not supported, keep this simple. *Do not* add
663          * support for them.
664          */
665         if (vleft != 1) {
666                 *lenp = 0;
667                 return -EINVAL;
668         }
669
670         if (!conv)
671                 conv = do_proc_douintvec_conv;
672
673         if (write)
674                 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
675                                            conv, data);
676         return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
677 }
678
679 int do_proc_douintvec(struct ctl_table *table, int write,
680                       void *buffer, size_t *lenp, loff_t *ppos,
681                       int (*conv)(unsigned long *lvalp,
682                                   unsigned int *valp,
683                                   int write, void *data),
684                       void *data)
685 {
686         return __do_proc_douintvec(table->data, table, write,
687                                    buffer, lenp, ppos, conv, data);
688 }
689
690 /**
691  * proc_dobool - read/write a bool
692  * @table: the sysctl table
693  * @write: %TRUE if this is a write to the sysctl file
694  * @buffer: the user buffer
695  * @lenp: the size of the user buffer
696  * @ppos: file position
697  *
698  * Reads/writes one integer value from/to the user buffer,
699  * treated as an ASCII string.
700  *
701  * table->data must point to a bool variable and table->maxlen must
702  * be sizeof(bool).
703  *
704  * Returns 0 on success.
705  */
706 int proc_dobool(struct ctl_table *table, int write, void *buffer,
707                 size_t *lenp, loff_t *ppos)
708 {
709         struct ctl_table tmp;
710         bool *data = table->data;
711         int res, val;
712
713         /* Do not support arrays yet. */
714         if (table->maxlen != sizeof(bool))
715                 return -EINVAL;
716
717         tmp = *table;
718         tmp.maxlen = sizeof(val);
719         tmp.data = &val;
720
721         val = READ_ONCE(*data);
722         res = proc_dointvec(&tmp, write, buffer, lenp, ppos);
723         if (res)
724                 return res;
725         if (write)
726                 WRITE_ONCE(*data, val);
727         return 0;
728 }
729
730 /**
731  * proc_dointvec - read a vector of integers
732  * @table: the sysctl table
733  * @write: %TRUE if this is a write to the sysctl file
734  * @buffer: the user buffer
735  * @lenp: the size of the user buffer
736  * @ppos: file position
737  *
738  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
739  * values from/to the user buffer, treated as an ASCII string.
740  *
741  * Returns 0 on success.
742  */
743 int proc_dointvec(struct ctl_table *table, int write, void *buffer,
744                   size_t *lenp, loff_t *ppos)
745 {
746         return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
747 }
748
749 #ifdef CONFIG_COMPACTION
750 static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
751                 int write, void *buffer, size_t *lenp, loff_t *ppos)
752 {
753         int ret, old;
754
755         if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
756                 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
757
758         old = *(int *)table->data;
759         ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
760         if (ret)
761                 return ret;
762         if (old != *(int *)table->data)
763                 pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
764                              table->procname, current->comm,
765                              task_pid_nr(current));
766         return ret;
767 }
768 #endif
769
770 /**
771  * proc_douintvec - read a vector of unsigned integers
772  * @table: the sysctl table
773  * @write: %TRUE if this is a write to the sysctl file
774  * @buffer: the user buffer
775  * @lenp: the size of the user buffer
776  * @ppos: file position
777  *
778  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
779  * values from/to the user buffer, treated as an ASCII string.
780  *
781  * Returns 0 on success.
782  */
783 int proc_douintvec(struct ctl_table *table, int write, void *buffer,
784                 size_t *lenp, loff_t *ppos)
785 {
786         return do_proc_douintvec(table, write, buffer, lenp, ppos,
787                                  do_proc_douintvec_conv, NULL);
788 }
789
790 /*
791  * Taint values can only be increased
792  * This means we can safely use a temporary.
793  */
794 static int proc_taint(struct ctl_table *table, int write,
795                                void *buffer, size_t *lenp, loff_t *ppos)
796 {
797         struct ctl_table t;
798         unsigned long tmptaint = get_taint();
799         int err;
800
801         if (write && !capable(CAP_SYS_ADMIN))
802                 return -EPERM;
803
804         t = *table;
805         t.data = &tmptaint;
806         err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
807         if (err < 0)
808                 return err;
809
810         if (write) {
811                 int i;
812
813                 /*
814                  * If we are relying on panic_on_taint not producing
815                  * false positives due to userspace input, bail out
816                  * before setting the requested taint flags.
817                  */
818                 if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
819                         return -EINVAL;
820
821                 /*
822                  * Poor man's atomic or. Not worth adding a primitive
823                  * to everyone's atomic.h for this
824                  */
825                 for (i = 0; i < TAINT_FLAGS_COUNT; i++)
826                         if ((1UL << i) & tmptaint)
827                                 add_taint(i, LOCKDEP_STILL_OK);
828         }
829
830         return err;
831 }
832
833 /**
834  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
835  * @min: pointer to minimum allowable value
836  * @max: pointer to maximum allowable value
837  *
838  * The do_proc_dointvec_minmax_conv_param structure provides the
839  * minimum and maximum values for doing range checking for those sysctl
840  * parameters that use the proc_dointvec_minmax() handler.
841  */
842 struct do_proc_dointvec_minmax_conv_param {
843         int *min;
844         int *max;
845 };
846
847 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
848                                         int *valp,
849                                         int write, void *data)
850 {
851         int tmp, ret;
852         struct do_proc_dointvec_minmax_conv_param *param = data;
853         /*
854          * If writing, first do so via a temporary local int so we can
855          * bounds-check it before touching *valp.
856          */
857         int *ip = write ? &tmp : valp;
858
859         ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
860         if (ret)
861                 return ret;
862
863         if (write) {
864                 if ((param->min && *param->min > tmp) ||
865                     (param->max && *param->max < tmp))
866                         return -EINVAL;
867                 WRITE_ONCE(*valp, tmp);
868         }
869
870         return 0;
871 }
872
873 /**
874  * proc_dointvec_minmax - read a vector of integers with min/max values
875  * @table: the sysctl table
876  * @write: %TRUE if this is a write to the sysctl file
877  * @buffer: the user buffer
878  * @lenp: the size of the user buffer
879  * @ppos: file position
880  *
881  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
882  * values from/to the user buffer, treated as an ASCII string.
883  *
884  * This routine will ensure the values are within the range specified by
885  * table->extra1 (min) and table->extra2 (max).
886  *
887  * Returns 0 on success or -EINVAL on write when the range check fails.
888  */
889 int proc_dointvec_minmax(struct ctl_table *table, int write,
890                   void *buffer, size_t *lenp, loff_t *ppos)
891 {
892         struct do_proc_dointvec_minmax_conv_param param = {
893                 .min = (int *) table->extra1,
894                 .max = (int *) table->extra2,
895         };
896         return do_proc_dointvec(table, write, buffer, lenp, ppos,
897                                 do_proc_dointvec_minmax_conv, &param);
898 }
899
900 /**
901  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
902  * @min: pointer to minimum allowable value
903  * @max: pointer to maximum allowable value
904  *
905  * The do_proc_douintvec_minmax_conv_param structure provides the
906  * minimum and maximum values for doing range checking for those sysctl
907  * parameters that use the proc_douintvec_minmax() handler.
908  */
909 struct do_proc_douintvec_minmax_conv_param {
910         unsigned int *min;
911         unsigned int *max;
912 };
913
914 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
915                                          unsigned int *valp,
916                                          int write, void *data)
917 {
918         int ret;
919         unsigned int tmp;
920         struct do_proc_douintvec_minmax_conv_param *param = data;
921         /* write via temporary local uint for bounds-checking */
922         unsigned int *up = write ? &tmp : valp;
923
924         ret = do_proc_douintvec_conv(lvalp, up, write, data);
925         if (ret)
926                 return ret;
927
928         if (write) {
929                 if ((param->min && *param->min > tmp) ||
930                     (param->max && *param->max < tmp))
931                         return -ERANGE;
932
933                 WRITE_ONCE(*valp, tmp);
934         }
935
936         return 0;
937 }
938
939 /**
940  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
941  * @table: the sysctl table
942  * @write: %TRUE if this is a write to the sysctl file
943  * @buffer: the user buffer
944  * @lenp: the size of the user buffer
945  * @ppos: file position
946  *
947  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
948  * values from/to the user buffer, treated as an ASCII string. Negative
949  * strings are not allowed.
950  *
951  * This routine will ensure the values are within the range specified by
952  * table->extra1 (min) and table->extra2 (max). There is a final sanity
953  * check for UINT_MAX to avoid having to support wrap around uses from
954  * userspace.
955  *
956  * Returns 0 on success or -ERANGE on write when the range check fails.
957  */
958 int proc_douintvec_minmax(struct ctl_table *table, int write,
959                           void *buffer, size_t *lenp, loff_t *ppos)
960 {
961         struct do_proc_douintvec_minmax_conv_param param = {
962                 .min = (unsigned int *) table->extra1,
963                 .max = (unsigned int *) table->extra2,
964         };
965         return do_proc_douintvec(table, write, buffer, lenp, ppos,
966                                  do_proc_douintvec_minmax_conv, &param);
967 }
968
969 /**
970  * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
971  * @table: the sysctl table
972  * @write: %TRUE if this is a write to the sysctl file
973  * @buffer: the user buffer
974  * @lenp: the size of the user buffer
975  * @ppos: file position
976  *
977  * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
978  * values from/to the user buffer, treated as an ASCII string. Negative
979  * strings are not allowed.
980  *
981  * This routine will ensure the values are within the range specified by
982  * table->extra1 (min) and table->extra2 (max).
983  *
984  * Returns 0 on success or an error on write when the range check fails.
985  */
986 int proc_dou8vec_minmax(struct ctl_table *table, int write,
987                         void *buffer, size_t *lenp, loff_t *ppos)
988 {
989         struct ctl_table tmp;
990         unsigned int min = 0, max = 255U, val;
991         u8 *data = table->data;
992         struct do_proc_douintvec_minmax_conv_param param = {
993                 .min = &min,
994                 .max = &max,
995         };
996         int res;
997
998         /* Do not support arrays yet. */
999         if (table->maxlen != sizeof(u8))
1000                 return -EINVAL;
1001
1002         if (table->extra1) {
1003                 min = *(unsigned int *) table->extra1;
1004                 if (min > 255U)
1005                         return -EINVAL;
1006         }
1007         if (table->extra2) {
1008                 max = *(unsigned int *) table->extra2;
1009                 if (max > 255U)
1010                         return -EINVAL;
1011         }
1012
1013         tmp = *table;
1014
1015         tmp.maxlen = sizeof(val);
1016         tmp.data = &val;
1017         val = READ_ONCE(*data);
1018         res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
1019                                 do_proc_douintvec_minmax_conv, &param);
1020         if (res)
1021                 return res;
1022         if (write)
1023                 WRITE_ONCE(*data, val);
1024         return 0;
1025 }
1026 EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
1027
1028 #ifdef CONFIG_MAGIC_SYSRQ
1029 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
1030                                 void *buffer, size_t *lenp, loff_t *ppos)
1031 {
1032         int tmp, ret;
1033
1034         tmp = sysrq_mask();
1035
1036         ret = __do_proc_dointvec(&tmp, table, write, buffer,
1037                                lenp, ppos, NULL, NULL);
1038         if (ret || !write)
1039                 return ret;
1040
1041         if (write)
1042                 sysrq_toggle_support(tmp);
1043
1044         return 0;
1045 }
1046 #endif
1047
1048 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
1049                 int write, void *buffer, size_t *lenp, loff_t *ppos,
1050                 unsigned long convmul, unsigned long convdiv)
1051 {
1052         unsigned long *i, *min, *max;
1053         int vleft, first = 1, err = 0;
1054         size_t left;
1055         char *p;
1056
1057         if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1058                 *lenp = 0;
1059                 return 0;
1060         }
1061
1062         i = data;
1063         min = table->extra1;
1064         max = table->extra2;
1065         vleft = table->maxlen / sizeof(unsigned long);
1066         left = *lenp;
1067
1068         if (write) {
1069                 if (proc_first_pos_non_zero_ignore(ppos, table))
1070                         goto out;
1071
1072                 if (left > PAGE_SIZE - 1)
1073                         left = PAGE_SIZE - 1;
1074                 p = buffer;
1075         }
1076
1077         for (; left && vleft--; i++, first = 0) {
1078                 unsigned long val;
1079
1080                 if (write) {
1081                         bool neg;
1082
1083                         proc_skip_spaces(&p, &left);
1084                         if (!left)
1085                                 break;
1086
1087                         err = proc_get_long(&p, &left, &val, &neg,
1088                                              proc_wspace_sep,
1089                                              sizeof(proc_wspace_sep), NULL);
1090                         if (err || neg) {
1091                                 err = -EINVAL;
1092                                 break;
1093                         }
1094
1095                         val = convmul * val / convdiv;
1096                         if ((min && val < *min) || (max && val > *max)) {
1097                                 err = -EINVAL;
1098                                 break;
1099                         }
1100                         WRITE_ONCE(*i, val);
1101                 } else {
1102                         val = convdiv * READ_ONCE(*i) / convmul;
1103                         if (!first)
1104                                 proc_put_char(&buffer, &left, '\t');
1105                         proc_put_long(&buffer, &left, val, false);
1106                 }
1107         }
1108
1109         if (!write && !first && left && !err)
1110                 proc_put_char(&buffer, &left, '\n');
1111         if (write && !err)
1112                 proc_skip_spaces(&p, &left);
1113         if (write && first)
1114                 return err ? : -EINVAL;
1115         *lenp -= left;
1116 out:
1117         *ppos += *lenp;
1118         return err;
1119 }
1120
1121 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
1122                 void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
1123                 unsigned long convdiv)
1124 {
1125         return __do_proc_doulongvec_minmax(table->data, table, write,
1126                         buffer, lenp, ppos, convmul, convdiv);
1127 }
1128
1129 /**
1130  * proc_doulongvec_minmax - read a vector of long integers with min/max values
1131  * @table: the sysctl table
1132  * @write: %TRUE if this is a write to the sysctl file
1133  * @buffer: the user buffer
1134  * @lenp: the size of the user buffer
1135  * @ppos: file position
1136  *
1137  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1138  * values from/to the user buffer, treated as an ASCII string.
1139  *
1140  * This routine will ensure the values are within the range specified by
1141  * table->extra1 (min) and table->extra2 (max).
1142  *
1143  * Returns 0 on success.
1144  */
1145 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1146                            void *buffer, size_t *lenp, loff_t *ppos)
1147 {
1148     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1149 }
1150
1151 /**
1152  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1153  * @table: the sysctl table
1154  * @write: %TRUE if this is a write to the sysctl file
1155  * @buffer: the user buffer
1156  * @lenp: the size of the user buffer
1157  * @ppos: file position
1158  *
1159  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1160  * values from/to the user buffer, treated as an ASCII string. The values
1161  * are treated as milliseconds, and converted to jiffies when they are stored.
1162  *
1163  * This routine will ensure the values are within the range specified by
1164  * table->extra1 (min) and table->extra2 (max).
1165  *
1166  * Returns 0 on success.
1167  */
1168 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1169                                       void *buffer, size_t *lenp, loff_t *ppos)
1170 {
1171     return do_proc_doulongvec_minmax(table, write, buffer,
1172                                      lenp, ppos, HZ, 1000l);
1173 }
1174
1175
1176 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1177                                          int *valp,
1178                                          int write, void *data)
1179 {
1180         if (write) {
1181                 if (*lvalp > INT_MAX / HZ)
1182                         return 1;
1183                 if (*negp)
1184                         WRITE_ONCE(*valp, -*lvalp * HZ);
1185                 else
1186                         WRITE_ONCE(*valp, *lvalp * HZ);
1187         } else {
1188                 int val = READ_ONCE(*valp);
1189                 unsigned long lval;
1190                 if (val < 0) {
1191                         *negp = true;
1192                         lval = -(unsigned long)val;
1193                 } else {
1194                         *negp = false;
1195                         lval = (unsigned long)val;
1196                 }
1197                 *lvalp = lval / HZ;
1198         }
1199         return 0;
1200 }
1201
1202 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1203                                                 int *valp,
1204                                                 int write, void *data)
1205 {
1206         if (write) {
1207                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1208                         return 1;
1209                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1210         } else {
1211                 int val = *valp;
1212                 unsigned long lval;
1213                 if (val < 0) {
1214                         *negp = true;
1215                         lval = -(unsigned long)val;
1216                 } else {
1217                         *negp = false;
1218                         lval = (unsigned long)val;
1219                 }
1220                 *lvalp = jiffies_to_clock_t(lval);
1221         }
1222         return 0;
1223 }
1224
1225 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1226                                             int *valp,
1227                                             int write, void *data)
1228 {
1229         if (write) {
1230                 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1231
1232                 if (jif > INT_MAX)
1233                         return 1;
1234                 WRITE_ONCE(*valp, (int)jif);
1235         } else {
1236                 int val = READ_ONCE(*valp);
1237                 unsigned long lval;
1238                 if (val < 0) {
1239                         *negp = true;
1240                         lval = -(unsigned long)val;
1241                 } else {
1242                         *negp = false;
1243                         lval = (unsigned long)val;
1244                 }
1245                 *lvalp = jiffies_to_msecs(lval);
1246         }
1247         return 0;
1248 }
1249
1250 static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lvalp,
1251                                                 int *valp, int write, void *data)
1252 {
1253         int tmp, ret;
1254         struct do_proc_dointvec_minmax_conv_param *param = data;
1255         /*
1256          * If writing, first do so via a temporary local int so we can
1257          * bounds-check it before touching *valp.
1258          */
1259         int *ip = write ? &tmp : valp;
1260
1261         ret = do_proc_dointvec_ms_jiffies_conv(negp, lvalp, ip, write, data);
1262         if (ret)
1263                 return ret;
1264
1265         if (write) {
1266                 if ((param->min && *param->min > tmp) ||
1267                                 (param->max && *param->max < tmp))
1268                         return -EINVAL;
1269                 *valp = tmp;
1270         }
1271         return 0;
1272 }
1273
1274 /**
1275  * proc_dointvec_jiffies - read a vector of integers as seconds
1276  * @table: the sysctl table
1277  * @write: %TRUE if this is a write to the sysctl file
1278  * @buffer: the user buffer
1279  * @lenp: the size of the user buffer
1280  * @ppos: file position
1281  *
1282  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1283  * values from/to the user buffer, treated as an ASCII string.
1284  * The values read are assumed to be in seconds, and are converted into
1285  * jiffies.
1286  *
1287  * Returns 0 on success.
1288  */
1289 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1290                           void *buffer, size_t *lenp, loff_t *ppos)
1291 {
1292     return do_proc_dointvec(table,write,buffer,lenp,ppos,
1293                             do_proc_dointvec_jiffies_conv,NULL);
1294 }
1295
1296 int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1297                           void *buffer, size_t *lenp, loff_t *ppos)
1298 {
1299         struct do_proc_dointvec_minmax_conv_param param = {
1300                 .min = (int *) table->extra1,
1301                 .max = (int *) table->extra2,
1302         };
1303         return do_proc_dointvec(table, write, buffer, lenp, ppos,
1304                         do_proc_dointvec_ms_jiffies_minmax_conv, &param);
1305 }
1306
1307 /**
1308  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
1309  * @table: the sysctl table
1310  * @write: %TRUE if this is a write to the sysctl file
1311  * @buffer: the user buffer
1312  * @lenp: the size of the user buffer
1313  * @ppos: pointer to the file position
1314  *
1315  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1316  * values from/to the user buffer, treated as an ASCII string.
1317  * The values read are assumed to be in 1/USER_HZ seconds, and
1318  * are converted into jiffies.
1319  *
1320  * Returns 0 on success.
1321  */
1322 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1323                                  void *buffer, size_t *lenp, loff_t *ppos)
1324 {
1325         return do_proc_dointvec(table, write, buffer, lenp, ppos,
1326                                 do_proc_dointvec_userhz_jiffies_conv, NULL);
1327 }
1328
1329 /**
1330  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
1331  * @table: the sysctl table
1332  * @write: %TRUE if this is a write to the sysctl file
1333  * @buffer: the user buffer
1334  * @lenp: the size of the user buffer
1335  * @ppos: file position
1336  * @ppos: the current position in the file
1337  *
1338  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1339  * values from/to the user buffer, treated as an ASCII string.
1340  * The values read are assumed to be in 1/1000 seconds, and
1341  * are converted into jiffies.
1342  *
1343  * Returns 0 on success.
1344  */
1345 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
1346                 size_t *lenp, loff_t *ppos)
1347 {
1348         return do_proc_dointvec(table, write, buffer, lenp, ppos,
1349                                 do_proc_dointvec_ms_jiffies_conv, NULL);
1350 }
1351
1352 static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
1353                 size_t *lenp, loff_t *ppos)
1354 {
1355         struct pid *new_pid;
1356         pid_t tmp;
1357         int r;
1358
1359         tmp = pid_vnr(cad_pid);
1360
1361         r = __do_proc_dointvec(&tmp, table, write, buffer,
1362                                lenp, ppos, NULL, NULL);
1363         if (r || !write)
1364                 return r;
1365
1366         new_pid = find_get_pid(tmp);
1367         if (!new_pid)
1368                 return -ESRCH;
1369
1370         put_pid(xchg(&cad_pid, new_pid));
1371         return 0;
1372 }
1373
1374 /**
1375  * proc_do_large_bitmap - read/write from/to a large bitmap
1376  * @table: the sysctl table
1377  * @write: %TRUE if this is a write to the sysctl file
1378  * @buffer: the user buffer
1379  * @lenp: the size of the user buffer
1380  * @ppos: file position
1381  *
1382  * The bitmap is stored at table->data and the bitmap length (in bits)
1383  * in table->maxlen.
1384  *
1385  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
1386  * large bitmaps may be represented in a compact manner. Writing into
1387  * the file will clear the bitmap then update it with the given input.
1388  *
1389  * Returns 0 on success.
1390  */
1391 int proc_do_large_bitmap(struct ctl_table *table, int write,
1392                          void *buffer, size_t *lenp, loff_t *ppos)
1393 {
1394         int err = 0;
1395         size_t left = *lenp;
1396         unsigned long bitmap_len = table->maxlen;
1397         unsigned long *bitmap = *(unsigned long **) table->data;
1398         unsigned long *tmp_bitmap = NULL;
1399         char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
1400
1401         if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
1402                 *lenp = 0;
1403                 return 0;
1404         }
1405
1406         if (write) {
1407                 char *p = buffer;
1408                 size_t skipped = 0;
1409
1410                 if (left > PAGE_SIZE - 1) {
1411                         left = PAGE_SIZE - 1;
1412                         /* How much of the buffer we'll skip this pass */
1413                         skipped = *lenp - left;
1414                 }
1415
1416                 tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
1417                 if (!tmp_bitmap)
1418                         return -ENOMEM;
1419                 proc_skip_char(&p, &left, '\n');
1420                 while (!err && left) {
1421                         unsigned long val_a, val_b;
1422                         bool neg;
1423                         size_t saved_left;
1424
1425                         /* In case we stop parsing mid-number, we can reset */
1426                         saved_left = left;
1427                         err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
1428                                              sizeof(tr_a), &c);
1429                         /*
1430                          * If we consumed the entirety of a truncated buffer or
1431                          * only one char is left (may be a "-"), then stop here,
1432                          * reset, & come back for more.
1433                          */
1434                         if ((left <= 1) && skipped) {
1435                                 left = saved_left;
1436                                 break;
1437                         }
1438
1439                         if (err)
1440                                 break;
1441                         if (val_a >= bitmap_len || neg) {
1442                                 err = -EINVAL;
1443                                 break;
1444                         }
1445
1446                         val_b = val_a;
1447                         if (left) {
1448                                 p++;
1449                                 left--;
1450                         }
1451
1452                         if (c == '-') {
1453                                 err = proc_get_long(&p, &left, &val_b,
1454                                                      &neg, tr_b, sizeof(tr_b),
1455                                                      &c);
1456                                 /*
1457                                  * If we consumed all of a truncated buffer or
1458                                  * then stop here, reset, & come back for more.
1459                                  */
1460                                 if (!left && skipped) {
1461                                         left = saved_left;
1462                                         break;
1463                                 }
1464
1465                                 if (err)
1466                                         break;
1467                                 if (val_b >= bitmap_len || neg ||
1468                                     val_a > val_b) {
1469                                         err = -EINVAL;
1470                                         break;
1471                                 }
1472                                 if (left) {
1473                                         p++;
1474                                         left--;
1475                                 }
1476                         }
1477
1478                         bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
1479                         proc_skip_char(&p, &left, '\n');
1480                 }
1481                 left += skipped;
1482         } else {
1483                 unsigned long bit_a, bit_b = 0;
1484                 bool first = 1;
1485
1486                 while (left) {
1487                         bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
1488                         if (bit_a >= bitmap_len)
1489                                 break;
1490                         bit_b = find_next_zero_bit(bitmap, bitmap_len,
1491                                                    bit_a + 1) - 1;
1492
1493                         if (!first)
1494                                 proc_put_char(&buffer, &left, ',');
1495                         proc_put_long(&buffer, &left, bit_a, false);
1496                         if (bit_a != bit_b) {
1497                                 proc_put_char(&buffer, &left, '-');
1498                                 proc_put_long(&buffer, &left, bit_b, false);
1499                         }
1500
1501                         first = 0; bit_b++;
1502                 }
1503                 proc_put_char(&buffer, &left, '\n');
1504         }
1505
1506         if (!err) {
1507                 if (write) {
1508                         if (*ppos)
1509                                 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
1510                         else
1511                                 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
1512                 }
1513                 *lenp -= left;
1514                 *ppos += *lenp;
1515         }
1516
1517         bitmap_free(tmp_bitmap);
1518         return err;
1519 }
1520
1521 #else /* CONFIG_PROC_SYSCTL */
1522
1523 int proc_dostring(struct ctl_table *table, int write,
1524                   void *buffer, size_t *lenp, loff_t *ppos)
1525 {
1526         return -ENOSYS;
1527 }
1528
1529 int proc_dobool(struct ctl_table *table, int write,
1530                 void *buffer, size_t *lenp, loff_t *ppos)
1531 {
1532         return -ENOSYS;
1533 }
1534
1535 int proc_dointvec(struct ctl_table *table, int write,
1536                   void *buffer, size_t *lenp, loff_t *ppos)
1537 {
1538         return -ENOSYS;
1539 }
1540
1541 int proc_douintvec(struct ctl_table *table, int write,
1542                   void *buffer, size_t *lenp, loff_t *ppos)
1543 {
1544         return -ENOSYS;
1545 }
1546
1547 int proc_dointvec_minmax(struct ctl_table *table, int write,
1548                     void *buffer, size_t *lenp, loff_t *ppos)
1549 {
1550         return -ENOSYS;
1551 }
1552
1553 int proc_douintvec_minmax(struct ctl_table *table, int write,
1554                           void *buffer, size_t *lenp, loff_t *ppos)
1555 {
1556         return -ENOSYS;
1557 }
1558
1559 int proc_dou8vec_minmax(struct ctl_table *table, int write,
1560                         void *buffer, size_t *lenp, loff_t *ppos)
1561 {
1562         return -ENOSYS;
1563 }
1564
1565 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1566                     void *buffer, size_t *lenp, loff_t *ppos)
1567 {
1568         return -ENOSYS;
1569 }
1570
1571 int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1572                                     void *buffer, size_t *lenp, loff_t *ppos)
1573 {
1574         return -ENOSYS;
1575 }
1576
1577 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1578                     void *buffer, size_t *lenp, loff_t *ppos)
1579 {
1580         return -ENOSYS;
1581 }
1582
1583 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
1584                              void *buffer, size_t *lenp, loff_t *ppos)
1585 {
1586         return -ENOSYS;
1587 }
1588
1589 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1590                     void *buffer, size_t *lenp, loff_t *ppos)
1591 {
1592         return -ENOSYS;
1593 }
1594
1595 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1596                                       void *buffer, size_t *lenp, loff_t *ppos)
1597 {
1598         return -ENOSYS;
1599 }
1600
1601 int proc_do_large_bitmap(struct ctl_table *table, int write,
1602                          void *buffer, size_t *lenp, loff_t *ppos)
1603 {
1604         return -ENOSYS;
1605 }
1606
1607 #endif /* CONFIG_PROC_SYSCTL */
1608
1609 #if defined(CONFIG_SYSCTL)
1610 int proc_do_static_key(struct ctl_table *table, int write,
1611                        void *buffer, size_t *lenp, loff_t *ppos)
1612 {
1613         struct static_key *key = (struct static_key *)table->data;
1614         static DEFINE_MUTEX(static_key_mutex);
1615         int val, ret;
1616         struct ctl_table tmp = {
1617                 .data   = &val,
1618                 .maxlen = sizeof(val),
1619                 .mode   = table->mode,
1620                 .extra1 = SYSCTL_ZERO,
1621                 .extra2 = SYSCTL_ONE,
1622         };
1623
1624         if (write && !capable(CAP_SYS_ADMIN))
1625                 return -EPERM;
1626
1627         mutex_lock(&static_key_mutex);
1628         val = static_key_enabled(key);
1629         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1630         if (write && !ret) {
1631                 if (val)
1632                         static_key_enable(key);
1633                 else
1634                         static_key_disable(key);
1635         }
1636         mutex_unlock(&static_key_mutex);
1637         return ret;
1638 }
1639
1640 static struct ctl_table kern_table[] = {
1641         {
1642                 .procname       = "panic",
1643                 .data           = &panic_timeout,
1644                 .maxlen         = sizeof(int),
1645                 .mode           = 0644,
1646                 .proc_handler   = proc_dointvec,
1647         },
1648 #ifdef CONFIG_PROC_SYSCTL
1649         {
1650                 .procname       = "tainted",
1651                 .maxlen         = sizeof(long),
1652                 .mode           = 0644,
1653                 .proc_handler   = proc_taint,
1654         },
1655         {
1656                 .procname       = "sysctl_writes_strict",
1657                 .data           = &sysctl_writes_strict,
1658                 .maxlen         = sizeof(int),
1659                 .mode           = 0644,
1660                 .proc_handler   = proc_dointvec_minmax,
1661                 .extra1         = SYSCTL_NEG_ONE,
1662                 .extra2         = SYSCTL_ONE,
1663         },
1664 #endif
1665         {
1666                 .procname       = "print-fatal-signals",
1667                 .data           = &print_fatal_signals,
1668                 .maxlen         = sizeof(int),
1669                 .mode           = 0644,
1670                 .proc_handler   = proc_dointvec,
1671         },
1672 #ifdef CONFIG_SPARC
1673         {
1674                 .procname       = "reboot-cmd",
1675                 .data           = reboot_command,
1676                 .maxlen         = 256,
1677                 .mode           = 0644,
1678                 .proc_handler   = proc_dostring,
1679         },
1680         {
1681                 .procname       = "stop-a",
1682                 .data           = &stop_a_enabled,
1683                 .maxlen         = sizeof (int),
1684                 .mode           = 0644,
1685                 .proc_handler   = proc_dointvec,
1686         },
1687         {
1688                 .procname       = "scons-poweroff",
1689                 .data           = &scons_pwroff,
1690                 .maxlen         = sizeof (int),
1691                 .mode           = 0644,
1692                 .proc_handler   = proc_dointvec,
1693         },
1694 #endif
1695 #ifdef CONFIG_SPARC64
1696         {
1697                 .procname       = "tsb-ratio",
1698                 .data           = &sysctl_tsb_ratio,
1699                 .maxlen         = sizeof (int),
1700                 .mode           = 0644,
1701                 .proc_handler   = proc_dointvec,
1702         },
1703 #endif
1704 #ifdef CONFIG_PARISC
1705         {
1706                 .procname       = "soft-power",
1707                 .data           = &pwrsw_enabled,
1708                 .maxlen         = sizeof (int),
1709                 .mode           = 0644,
1710                 .proc_handler   = proc_dointvec,
1711         },
1712 #endif
1713 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
1714         {
1715                 .procname       = "unaligned-trap",
1716                 .data           = &unaligned_enabled,
1717                 .maxlen         = sizeof (int),
1718                 .mode           = 0644,
1719                 .proc_handler   = proc_dointvec,
1720         },
1721 #endif
1722 #ifdef CONFIG_STACK_TRACER
1723         {
1724                 .procname       = "stack_tracer_enabled",
1725                 .data           = &stack_tracer_enabled,
1726                 .maxlen         = sizeof(int),
1727                 .mode           = 0644,
1728                 .proc_handler   = stack_trace_sysctl,
1729         },
1730 #endif
1731 #ifdef CONFIG_TRACING
1732         {
1733                 .procname       = "ftrace_dump_on_oops",
1734                 .data           = &ftrace_dump_on_oops,
1735                 .maxlen         = sizeof(int),
1736                 .mode           = 0644,
1737                 .proc_handler   = proc_dointvec,
1738         },
1739         {
1740                 .procname       = "traceoff_on_warning",
1741                 .data           = &__disable_trace_on_warning,
1742                 .maxlen         = sizeof(__disable_trace_on_warning),
1743                 .mode           = 0644,
1744                 .proc_handler   = proc_dointvec,
1745         },
1746         {
1747                 .procname       = "tracepoint_printk",
1748                 .data           = &tracepoint_printk,
1749                 .maxlen         = sizeof(tracepoint_printk),
1750                 .mode           = 0644,
1751                 .proc_handler   = tracepoint_printk_sysctl,
1752         },
1753 #endif
1754 #ifdef CONFIG_MODULES
1755         {
1756                 .procname       = "modprobe",
1757                 .data           = &modprobe_path,
1758                 .maxlen         = KMOD_PATH_LEN,
1759                 .mode           = 0644,
1760                 .proc_handler   = proc_dostring,
1761         },
1762         {
1763                 .procname       = "modules_disabled",
1764                 .data           = &modules_disabled,
1765                 .maxlen         = sizeof(int),
1766                 .mode           = 0644,
1767                 /* only handle a transition from default "0" to "1" */
1768                 .proc_handler   = proc_dointvec_minmax,
1769                 .extra1         = SYSCTL_ONE,
1770                 .extra2         = SYSCTL_ONE,
1771         },
1772 #endif
1773 #ifdef CONFIG_UEVENT_HELPER
1774         {
1775                 .procname       = "hotplug",
1776                 .data           = &uevent_helper,
1777                 .maxlen         = UEVENT_HELPER_PATH_LEN,
1778                 .mode           = 0644,
1779                 .proc_handler   = proc_dostring,
1780         },
1781 #endif
1782 #ifdef CONFIG_MAGIC_SYSRQ
1783         {
1784                 .procname       = "sysrq",
1785                 .data           = NULL,
1786                 .maxlen         = sizeof (int),
1787                 .mode           = 0644,
1788                 .proc_handler   = sysrq_sysctl_handler,
1789         },
1790 #endif
1791 #ifdef CONFIG_PROC_SYSCTL
1792         {
1793                 .procname       = "cad_pid",
1794                 .data           = NULL,
1795                 .maxlen         = sizeof (int),
1796                 .mode           = 0600,
1797                 .proc_handler   = proc_do_cad_pid,
1798         },
1799 #endif
1800         {
1801                 .procname       = "threads-max",
1802                 .data           = NULL,
1803                 .maxlen         = sizeof(int),
1804                 .mode           = 0644,
1805                 .proc_handler   = sysctl_max_threads,
1806         },
1807         {
1808                 .procname       = "usermodehelper",
1809                 .mode           = 0555,
1810                 .child          = usermodehelper_table,
1811         },
1812         {
1813                 .procname       = "overflowuid",
1814                 .data           = &overflowuid,
1815                 .maxlen         = sizeof(int),
1816                 .mode           = 0644,
1817                 .proc_handler   = proc_dointvec_minmax,
1818                 .extra1         = SYSCTL_ZERO,
1819                 .extra2         = SYSCTL_MAXOLDUID,
1820         },
1821         {
1822                 .procname       = "overflowgid",
1823                 .data           = &overflowgid,
1824                 .maxlen         = sizeof(int),
1825                 .mode           = 0644,
1826                 .proc_handler   = proc_dointvec_minmax,
1827                 .extra1         = SYSCTL_ZERO,
1828                 .extra2         = SYSCTL_MAXOLDUID,
1829         },
1830 #ifdef CONFIG_S390
1831         {
1832                 .procname       = "userprocess_debug",
1833                 .data           = &show_unhandled_signals,
1834                 .maxlen         = sizeof(int),
1835                 .mode           = 0644,
1836                 .proc_handler   = proc_dointvec,
1837         },
1838 #endif
1839         {
1840                 .procname       = "pid_max",
1841                 .data           = &pid_max,
1842                 .maxlen         = sizeof (int),
1843                 .mode           = 0644,
1844                 .proc_handler   = proc_dointvec_minmax,
1845                 .extra1         = &pid_max_min,
1846                 .extra2         = &pid_max_max,
1847         },
1848         {
1849                 .procname       = "panic_on_oops",
1850                 .data           = &panic_on_oops,
1851                 .maxlen         = sizeof(int),
1852                 .mode           = 0644,
1853                 .proc_handler   = proc_dointvec,
1854         },
1855         {
1856                 .procname       = "panic_print",
1857                 .data           = &panic_print,
1858                 .maxlen         = sizeof(unsigned long),
1859                 .mode           = 0644,
1860                 .proc_handler   = proc_doulongvec_minmax,
1861         },
1862         {
1863                 .procname       = "ngroups_max",
1864                 .data           = (void *)&ngroups_max,
1865                 .maxlen         = sizeof (int),
1866                 .mode           = 0444,
1867                 .proc_handler   = proc_dointvec,
1868         },
1869         {
1870                 .procname       = "cap_last_cap",
1871                 .data           = (void *)&cap_last_cap,
1872                 .maxlen         = sizeof(int),
1873                 .mode           = 0444,
1874                 .proc_handler   = proc_dointvec,
1875         },
1876 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
1877         {
1878                 .procname       = "unknown_nmi_panic",
1879                 .data           = &unknown_nmi_panic,
1880                 .maxlen         = sizeof (int),
1881                 .mode           = 0644,
1882                 .proc_handler   = proc_dointvec,
1883         },
1884 #endif
1885
1886 #if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
1887         defined(CONFIG_DEBUG_STACKOVERFLOW)
1888         {
1889                 .procname       = "panic_on_stackoverflow",
1890                 .data           = &sysctl_panic_on_stackoverflow,
1891                 .maxlen         = sizeof(int),
1892                 .mode           = 0644,
1893                 .proc_handler   = proc_dointvec,
1894         },
1895 #endif
1896 #if defined(CONFIG_X86)
1897         {
1898                 .procname       = "panic_on_unrecovered_nmi",
1899                 .data           = &panic_on_unrecovered_nmi,
1900                 .maxlen         = sizeof(int),
1901                 .mode           = 0644,
1902                 .proc_handler   = proc_dointvec,
1903         },
1904         {
1905                 .procname       = "panic_on_io_nmi",
1906                 .data           = &panic_on_io_nmi,
1907                 .maxlen         = sizeof(int),
1908                 .mode           = 0644,
1909                 .proc_handler   = proc_dointvec,
1910         },
1911         {
1912                 .procname       = "bootloader_type",
1913                 .data           = &bootloader_type,
1914                 .maxlen         = sizeof (int),
1915                 .mode           = 0444,
1916                 .proc_handler   = proc_dointvec,
1917         },
1918         {
1919                 .procname       = "bootloader_version",
1920                 .data           = &bootloader_version,
1921                 .maxlen         = sizeof (int),
1922                 .mode           = 0444,
1923                 .proc_handler   = proc_dointvec,
1924         },
1925         {
1926                 .procname       = "io_delay_type",
1927                 .data           = &io_delay_type,
1928                 .maxlen         = sizeof(int),
1929                 .mode           = 0644,
1930                 .proc_handler   = proc_dointvec,
1931         },
1932 #endif
1933 #if defined(CONFIG_MMU)
1934         {
1935                 .procname       = "randomize_va_space",
1936                 .data           = &randomize_va_space,
1937                 .maxlen         = sizeof(int),
1938                 .mode           = 0644,
1939                 .proc_handler   = proc_dointvec,
1940         },
1941 #endif
1942 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1943         {
1944                 .procname       = "spin_retry",
1945                 .data           = &spin_retry,
1946                 .maxlen         = sizeof (int),
1947                 .mode           = 0644,
1948                 .proc_handler   = proc_dointvec,
1949         },
1950 #endif
1951 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1952         {
1953                 .procname       = "acpi_video_flags",
1954                 .data           = &acpi_realmode_flags,
1955                 .maxlen         = sizeof (unsigned long),
1956                 .mode           = 0644,
1957                 .proc_handler   = proc_doulongvec_minmax,
1958         },
1959 #endif
1960 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1961         {
1962                 .procname       = "ignore-unaligned-usertrap",
1963                 .data           = &no_unaligned_warning,
1964                 .maxlen         = sizeof (int),
1965                 .mode           = 0644,
1966                 .proc_handler   = proc_dointvec,
1967         },
1968 #endif
1969 #ifdef CONFIG_IA64
1970         {
1971                 .procname       = "unaligned-dump-stack",
1972                 .data           = &unaligned_dump_stack,
1973                 .maxlen         = sizeof (int),
1974                 .mode           = 0644,
1975                 .proc_handler   = proc_dointvec,
1976         },
1977 #endif
1978 #ifdef CONFIG_RT_MUTEXES
1979         {
1980                 .procname       = "max_lock_depth",
1981                 .data           = &max_lock_depth,
1982                 .maxlen         = sizeof(int),
1983                 .mode           = 0644,
1984                 .proc_handler   = proc_dointvec,
1985         },
1986 #endif
1987 #ifdef CONFIG_KEYS
1988         {
1989                 .procname       = "keys",
1990                 .mode           = 0555,
1991                 .child          = key_sysctls,
1992         },
1993 #endif
1994 #ifdef CONFIG_PERF_EVENTS
1995         /*
1996          * User-space scripts rely on the existence of this file
1997          * as a feature check for perf_events being enabled.
1998          *
1999          * So it's an ABI, do not remove!
2000          */
2001         {
2002                 .procname       = "perf_event_paranoid",
2003                 .data           = &sysctl_perf_event_paranoid,
2004                 .maxlen         = sizeof(sysctl_perf_event_paranoid),
2005                 .mode           = 0644,
2006                 .proc_handler   = proc_dointvec,
2007         },
2008         {
2009                 .procname       = "perf_event_mlock_kb",
2010                 .data           = &sysctl_perf_event_mlock,
2011                 .maxlen         = sizeof(sysctl_perf_event_mlock),
2012                 .mode           = 0644,
2013                 .proc_handler   = proc_dointvec,
2014         },
2015         {
2016                 .procname       = "perf_event_max_sample_rate",
2017                 .data           = &sysctl_perf_event_sample_rate,
2018                 .maxlen         = sizeof(sysctl_perf_event_sample_rate),
2019                 .mode           = 0644,
2020                 .proc_handler   = perf_proc_update_handler,
2021                 .extra1         = SYSCTL_ONE,
2022         },
2023         {
2024                 .procname       = "perf_cpu_time_max_percent",
2025                 .data           = &sysctl_perf_cpu_time_max_percent,
2026                 .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
2027                 .mode           = 0644,
2028                 .proc_handler   = perf_cpu_time_max_percent_handler,
2029                 .extra1         = SYSCTL_ZERO,
2030                 .extra2         = SYSCTL_ONE_HUNDRED,
2031         },
2032         {
2033                 .procname       = "perf_event_max_stack",
2034                 .data           = &sysctl_perf_event_max_stack,
2035                 .maxlen         = sizeof(sysctl_perf_event_max_stack),
2036                 .mode           = 0644,
2037                 .proc_handler   = perf_event_max_stack_handler,
2038                 .extra1         = SYSCTL_ZERO,
2039                 .extra2         = (void *)&six_hundred_forty_kb,
2040         },
2041         {
2042                 .procname       = "perf_event_max_contexts_per_stack",
2043                 .data           = &sysctl_perf_event_max_contexts_per_stack,
2044                 .maxlen         = sizeof(sysctl_perf_event_max_contexts_per_stack),
2045                 .mode           = 0644,
2046                 .proc_handler   = perf_event_max_stack_handler,
2047                 .extra1         = SYSCTL_ZERO,
2048                 .extra2         = SYSCTL_ONE_THOUSAND,
2049         },
2050 #endif
2051         {
2052                 .procname       = "panic_on_warn",
2053                 .data           = &panic_on_warn,
2054                 .maxlen         = sizeof(int),
2055                 .mode           = 0644,
2056                 .proc_handler   = proc_dointvec_minmax,
2057                 .extra1         = SYSCTL_ZERO,
2058                 .extra2         = SYSCTL_ONE,
2059         },
2060 #ifdef CONFIG_TREE_RCU
2061         {
2062                 .procname       = "panic_on_rcu_stall",
2063                 .data           = &sysctl_panic_on_rcu_stall,
2064                 .maxlen         = sizeof(sysctl_panic_on_rcu_stall),
2065                 .mode           = 0644,
2066                 .proc_handler   = proc_dointvec_minmax,
2067                 .extra1         = SYSCTL_ZERO,
2068                 .extra2         = SYSCTL_ONE,
2069         },
2070         {
2071                 .procname       = "max_rcu_stall_to_panic",
2072                 .data           = &sysctl_max_rcu_stall_to_panic,
2073                 .maxlen         = sizeof(sysctl_max_rcu_stall_to_panic),
2074                 .mode           = 0644,
2075                 .proc_handler   = proc_dointvec_minmax,
2076                 .extra1         = SYSCTL_ONE,
2077                 .extra2         = SYSCTL_INT_MAX,
2078         },
2079 #endif
2080         { }
2081 };
2082
2083 static struct ctl_table vm_table[] = {
2084         {
2085                 .procname       = "overcommit_memory",
2086                 .data           = &sysctl_overcommit_memory,
2087                 .maxlen         = sizeof(sysctl_overcommit_memory),
2088                 .mode           = 0644,
2089                 .proc_handler   = overcommit_policy_handler,
2090                 .extra1         = SYSCTL_ZERO,
2091                 .extra2         = SYSCTL_TWO,
2092         },
2093         {
2094                 .procname       = "overcommit_ratio",
2095                 .data           = &sysctl_overcommit_ratio,
2096                 .maxlen         = sizeof(sysctl_overcommit_ratio),
2097                 .mode           = 0644,
2098                 .proc_handler   = overcommit_ratio_handler,
2099         },
2100         {
2101                 .procname       = "overcommit_kbytes",
2102                 .data           = &sysctl_overcommit_kbytes,
2103                 .maxlen         = sizeof(sysctl_overcommit_kbytes),
2104                 .mode           = 0644,
2105                 .proc_handler   = overcommit_kbytes_handler,
2106         },
2107         {
2108                 .procname       = "page-cluster",
2109                 .data           = &page_cluster,
2110                 .maxlen         = sizeof(int),
2111                 .mode           = 0644,
2112                 .proc_handler   = proc_dointvec_minmax,
2113                 .extra1         = SYSCTL_ZERO,
2114                 .extra2         = (void *)&page_cluster_max,
2115         },
2116         {
2117                 .procname       = "dirtytime_expire_seconds",
2118                 .data           = &dirtytime_expire_interval,
2119                 .maxlen         = sizeof(dirtytime_expire_interval),
2120                 .mode           = 0644,
2121                 .proc_handler   = dirtytime_interval_handler,
2122                 .extra1         = SYSCTL_ZERO,
2123         },
2124         {
2125                 .procname       = "swappiness",
2126                 .data           = &vm_swappiness,
2127                 .maxlen         = sizeof(vm_swappiness),
2128                 .mode           = 0644,
2129                 .proc_handler   = proc_dointvec_minmax,
2130                 .extra1         = SYSCTL_ZERO,
2131                 .extra2         = SYSCTL_TWO_HUNDRED,
2132         },
2133 #ifdef CONFIG_NUMA
2134         {
2135                 .procname       = "numa_stat",
2136                 .data           = &sysctl_vm_numa_stat,
2137                 .maxlen         = sizeof(int),
2138                 .mode           = 0644,
2139                 .proc_handler   = sysctl_vm_numa_stat_handler,
2140                 .extra1         = SYSCTL_ZERO,
2141                 .extra2         = SYSCTL_ONE,
2142         },
2143 #endif
2144 #ifdef CONFIG_HUGETLB_PAGE
2145         {
2146                 .procname       = "nr_hugepages",
2147                 .data           = NULL,
2148                 .maxlen         = sizeof(unsigned long),
2149                 .mode           = 0644,
2150                 .proc_handler   = hugetlb_sysctl_handler,
2151         },
2152 #ifdef CONFIG_NUMA
2153         {
2154                 .procname       = "nr_hugepages_mempolicy",
2155                 .data           = NULL,
2156                 .maxlen         = sizeof(unsigned long),
2157                 .mode           = 0644,
2158                 .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
2159         },
2160 #endif
2161          {
2162                 .procname       = "hugetlb_shm_group",
2163                 .data           = &sysctl_hugetlb_shm_group,
2164                 .maxlen         = sizeof(gid_t),
2165                 .mode           = 0644,
2166                 .proc_handler   = proc_dointvec,
2167          },
2168         {
2169                 .procname       = "nr_overcommit_hugepages",
2170                 .data           = NULL,
2171                 .maxlen         = sizeof(unsigned long),
2172                 .mode           = 0644,
2173                 .proc_handler   = hugetlb_overcommit_handler,
2174         },
2175 #endif
2176         {
2177                 .procname       = "lowmem_reserve_ratio",
2178                 .data           = &sysctl_lowmem_reserve_ratio,
2179                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
2180                 .mode           = 0644,
2181                 .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
2182         },
2183         {
2184                 .procname       = "drop_caches",
2185                 .data           = &sysctl_drop_caches,
2186                 .maxlen         = sizeof(int),
2187                 .mode           = 0200,
2188                 .proc_handler   = drop_caches_sysctl_handler,
2189                 .extra1         = SYSCTL_ONE,
2190                 .extra2         = SYSCTL_FOUR,
2191         },
2192 #ifdef CONFIG_COMPACTION
2193         {
2194                 .procname       = "compact_memory",
2195                 .data           = NULL,
2196                 .maxlen         = sizeof(int),
2197                 .mode           = 0200,
2198                 .proc_handler   = sysctl_compaction_handler,
2199         },
2200         {
2201                 .procname       = "compaction_proactiveness",
2202                 .data           = &sysctl_compaction_proactiveness,
2203                 .maxlen         = sizeof(sysctl_compaction_proactiveness),
2204                 .mode           = 0644,
2205                 .proc_handler   = compaction_proactiveness_sysctl_handler,
2206                 .extra1         = SYSCTL_ZERO,
2207                 .extra2         = SYSCTL_ONE_HUNDRED,
2208         },
2209         {
2210                 .procname       = "extfrag_threshold",
2211                 .data           = &sysctl_extfrag_threshold,
2212                 .maxlen         = sizeof(int),
2213                 .mode           = 0644,
2214                 .proc_handler   = proc_dointvec_minmax,
2215                 .extra1         = SYSCTL_ZERO,
2216                 .extra2         = SYSCTL_ONE_THOUSAND,
2217         },
2218         {
2219                 .procname       = "compact_unevictable_allowed",
2220                 .data           = &sysctl_compact_unevictable_allowed,
2221                 .maxlen         = sizeof(int),
2222                 .mode           = 0644,
2223                 .proc_handler   = proc_dointvec_minmax_warn_RT_change,
2224                 .extra1         = SYSCTL_ZERO,
2225                 .extra2         = SYSCTL_ONE,
2226         },
2227
2228 #endif /* CONFIG_COMPACTION */
2229         {
2230                 .procname       = "min_free_kbytes",
2231                 .data           = &min_free_kbytes,
2232                 .maxlen         = sizeof(min_free_kbytes),
2233                 .mode           = 0644,
2234                 .proc_handler   = min_free_kbytes_sysctl_handler,
2235                 .extra1         = SYSCTL_ZERO,
2236         },
2237         {
2238                 .procname       = "watermark_boost_factor",
2239                 .data           = &watermark_boost_factor,
2240                 .maxlen         = sizeof(watermark_boost_factor),
2241                 .mode           = 0644,
2242                 .proc_handler   = proc_dointvec_minmax,
2243                 .extra1         = SYSCTL_ZERO,
2244         },
2245         {
2246                 .procname       = "watermark_scale_factor",
2247                 .data           = &watermark_scale_factor,
2248                 .maxlen         = sizeof(watermark_scale_factor),
2249                 .mode           = 0644,
2250                 .proc_handler   = watermark_scale_factor_sysctl_handler,
2251                 .extra1         = SYSCTL_ONE,
2252                 .extra2         = SYSCTL_THREE_THOUSAND,
2253         },
2254         {
2255                 .procname       = "percpu_pagelist_high_fraction",
2256                 .data           = &percpu_pagelist_high_fraction,
2257                 .maxlen         = sizeof(percpu_pagelist_high_fraction),
2258                 .mode           = 0644,
2259                 .proc_handler   = percpu_pagelist_high_fraction_sysctl_handler,
2260                 .extra1         = SYSCTL_ZERO,
2261         },
2262         {
2263                 .procname       = "page_lock_unfairness",
2264                 .data           = &sysctl_page_lock_unfairness,
2265                 .maxlen         = sizeof(sysctl_page_lock_unfairness),
2266                 .mode           = 0644,
2267                 .proc_handler   = proc_dointvec_minmax,
2268                 .extra1         = SYSCTL_ZERO,
2269         },
2270 #ifdef CONFIG_MMU
2271         {
2272                 .procname       = "max_map_count",
2273                 .data           = &sysctl_max_map_count,
2274                 .maxlen         = sizeof(sysctl_max_map_count),
2275                 .mode           = 0644,
2276                 .proc_handler   = proc_dointvec_minmax,
2277                 .extra1         = SYSCTL_ZERO,
2278         },
2279 #else
2280         {
2281                 .procname       = "nr_trim_pages",
2282                 .data           = &sysctl_nr_trim_pages,
2283                 .maxlen         = sizeof(sysctl_nr_trim_pages),
2284                 .mode           = 0644,
2285                 .proc_handler   = proc_dointvec_minmax,
2286                 .extra1         = SYSCTL_ZERO,
2287         },
2288 #endif
2289         {
2290                 .procname       = "vfs_cache_pressure",
2291                 .data           = &sysctl_vfs_cache_pressure,
2292                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
2293                 .mode           = 0644,
2294                 .proc_handler   = proc_dointvec_minmax,
2295                 .extra1         = SYSCTL_ZERO,
2296         },
2297 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
2298     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
2299         {
2300                 .procname       = "legacy_va_layout",
2301                 .data           = &sysctl_legacy_va_layout,
2302                 .maxlen         = sizeof(sysctl_legacy_va_layout),
2303                 .mode           = 0644,
2304                 .proc_handler   = proc_dointvec_minmax,
2305                 .extra1         = SYSCTL_ZERO,
2306         },
2307 #endif
2308 #ifdef CONFIG_NUMA
2309         {
2310                 .procname       = "zone_reclaim_mode",
2311                 .data           = &node_reclaim_mode,
2312                 .maxlen         = sizeof(node_reclaim_mode),
2313                 .mode           = 0644,
2314                 .proc_handler   = proc_dointvec_minmax,
2315                 .extra1         = SYSCTL_ZERO,
2316         },
2317         {
2318                 .procname       = "min_unmapped_ratio",
2319                 .data           = &sysctl_min_unmapped_ratio,
2320                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
2321                 .mode           = 0644,
2322                 .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
2323                 .extra1         = SYSCTL_ZERO,
2324                 .extra2         = SYSCTL_ONE_HUNDRED,
2325         },
2326         {
2327                 .procname       = "min_slab_ratio",
2328                 .data           = &sysctl_min_slab_ratio,
2329                 .maxlen         = sizeof(sysctl_min_slab_ratio),
2330                 .mode           = 0644,
2331                 .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
2332                 .extra1         = SYSCTL_ZERO,
2333                 .extra2         = SYSCTL_ONE_HUNDRED,
2334         },
2335 #endif
2336 #ifdef CONFIG_SMP
2337         {
2338                 .procname       = "stat_interval",
2339                 .data           = &sysctl_stat_interval,
2340                 .maxlen         = sizeof(sysctl_stat_interval),
2341                 .mode           = 0644,
2342                 .proc_handler   = proc_dointvec_jiffies,
2343         },
2344         {
2345                 .procname       = "stat_refresh",
2346                 .data           = NULL,
2347                 .maxlen         = 0,
2348                 .mode           = 0600,
2349                 .proc_handler   = vmstat_refresh,
2350         },
2351 #endif
2352 #ifdef CONFIG_MMU
2353         {
2354                 .procname       = "mmap_min_addr",
2355                 .data           = &dac_mmap_min_addr,
2356                 .maxlen         = sizeof(unsigned long),
2357                 .mode           = 0644,
2358                 .proc_handler   = mmap_min_addr_handler,
2359         },
2360 #endif
2361 #ifdef CONFIG_NUMA
2362         {
2363                 .procname       = "numa_zonelist_order",
2364                 .data           = &numa_zonelist_order,
2365                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
2366                 .mode           = 0644,
2367                 .proc_handler   = numa_zonelist_order_handler,
2368         },
2369 #endif
2370 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
2371    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
2372         {
2373                 .procname       = "vdso_enabled",
2374 #ifdef CONFIG_X86_32
2375                 .data           = &vdso32_enabled,
2376                 .maxlen         = sizeof(vdso32_enabled),
2377 #else
2378                 .data           = &vdso_enabled,
2379                 .maxlen         = sizeof(vdso_enabled),
2380 #endif
2381                 .mode           = 0644,
2382                 .proc_handler   = proc_dointvec,
2383                 .extra1         = SYSCTL_ZERO,
2384         },
2385 #endif
2386 #ifdef CONFIG_MEMORY_FAILURE
2387         {
2388                 .procname       = "memory_failure_early_kill",
2389                 .data           = &sysctl_memory_failure_early_kill,
2390                 .maxlen         = sizeof(sysctl_memory_failure_early_kill),
2391                 .mode           = 0644,
2392                 .proc_handler   = proc_dointvec_minmax,
2393                 .extra1         = SYSCTL_ZERO,
2394                 .extra2         = SYSCTL_ONE,
2395         },
2396         {
2397                 .procname       = "memory_failure_recovery",
2398                 .data           = &sysctl_memory_failure_recovery,
2399                 .maxlen         = sizeof(sysctl_memory_failure_recovery),
2400                 .mode           = 0644,
2401                 .proc_handler   = proc_dointvec_minmax,
2402                 .extra1         = SYSCTL_ZERO,
2403                 .extra2         = SYSCTL_ONE,
2404         },
2405 #endif
2406         {
2407                 .procname       = "user_reserve_kbytes",
2408                 .data           = &sysctl_user_reserve_kbytes,
2409                 .maxlen         = sizeof(sysctl_user_reserve_kbytes),
2410                 .mode           = 0644,
2411                 .proc_handler   = proc_doulongvec_minmax,
2412         },
2413         {
2414                 .procname       = "admin_reserve_kbytes",
2415                 .data           = &sysctl_admin_reserve_kbytes,
2416                 .maxlen         = sizeof(sysctl_admin_reserve_kbytes),
2417                 .mode           = 0644,
2418                 .proc_handler   = proc_doulongvec_minmax,
2419         },
2420 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
2421         {
2422                 .procname       = "mmap_rnd_bits",
2423                 .data           = &mmap_rnd_bits,
2424                 .maxlen         = sizeof(mmap_rnd_bits),
2425                 .mode           = 0600,
2426                 .proc_handler   = proc_dointvec_minmax,
2427                 .extra1         = (void *)&mmap_rnd_bits_min,
2428                 .extra2         = (void *)&mmap_rnd_bits_max,
2429         },
2430 #endif
2431 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
2432         {
2433                 .procname       = "mmap_rnd_compat_bits",
2434                 .data           = &mmap_rnd_compat_bits,
2435                 .maxlen         = sizeof(mmap_rnd_compat_bits),
2436                 .mode           = 0600,
2437                 .proc_handler   = proc_dointvec_minmax,
2438                 .extra1         = (void *)&mmap_rnd_compat_bits_min,
2439                 .extra2         = (void *)&mmap_rnd_compat_bits_max,
2440         },
2441 #endif
2442 #ifdef CONFIG_USERFAULTFD
2443         {
2444                 .procname       = "unprivileged_userfaultfd",
2445                 .data           = &sysctl_unprivileged_userfaultfd,
2446                 .maxlen         = sizeof(sysctl_unprivileged_userfaultfd),
2447                 .mode           = 0644,
2448                 .proc_handler   = proc_dointvec_minmax,
2449                 .extra1         = SYSCTL_ZERO,
2450                 .extra2         = SYSCTL_ONE,
2451         },
2452 #endif
2453         { }
2454 };
2455
2456 static struct ctl_table debug_table[] = {
2457 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
2458         {
2459                 .procname       = "exception-trace",
2460                 .data           = &show_unhandled_signals,
2461                 .maxlen         = sizeof(int),
2462                 .mode           = 0644,
2463                 .proc_handler   = proc_dointvec
2464         },
2465 #endif
2466         { }
2467 };
2468
2469 static struct ctl_table dev_table[] = {
2470         { }
2471 };
2472
2473 DECLARE_SYSCTL_BASE(kernel, kern_table);
2474 DECLARE_SYSCTL_BASE(vm, vm_table);
2475 DECLARE_SYSCTL_BASE(debug, debug_table);
2476 DECLARE_SYSCTL_BASE(dev, dev_table);
2477
2478 int __init sysctl_init_bases(void)
2479 {
2480         register_sysctl_base(kernel);
2481         register_sysctl_base(vm);
2482         register_sysctl_base(debug);
2483         register_sysctl_base(dev);
2484
2485         return 0;
2486 }
2487 #endif /* CONFIG_SYSCTL */
2488 /*
2489  * No sense putting this after each symbol definition, twice,
2490  * exception granted :-)
2491  */
2492 EXPORT_SYMBOL(proc_dobool);
2493 EXPORT_SYMBOL(proc_dointvec);
2494 EXPORT_SYMBOL(proc_douintvec);
2495 EXPORT_SYMBOL(proc_dointvec_jiffies);
2496 EXPORT_SYMBOL(proc_dointvec_minmax);
2497 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
2498 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2499 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2500 EXPORT_SYMBOL(proc_dostring);
2501 EXPORT_SYMBOL(proc_doulongvec_minmax);
2502 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2503 EXPORT_SYMBOL(proc_do_large_bitmap);